diff options
author | Reid Spencer <rspencer@reidspencer.com> | 2007-07-11 17:01:13 +0000 |
---|---|---|
committer | Reid Spencer <rspencer@reidspencer.com> | 2007-07-11 17:01:13 +0000 |
commit | 5f016e2cb5d11daeb237544de1c5d59f20fe1a6e (patch) | |
tree | 8b6bfcb8783d16827f896d5facbd4549300e8a1e /Lex/Preprocessor.cpp | |
parent | a5f182095bf2065ca94f1c86957ee91f9068964b (diff) |
Stage two of getting CFE top correct.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@39734 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'Lex/Preprocessor.cpp')
-rw-r--r-- | Lex/Preprocessor.cpp | 2087 |
1 files changed, 2087 insertions, 0 deletions
diff --git a/Lex/Preprocessor.cpp b/Lex/Preprocessor.cpp new file mode 100644 index 0000000000..104fb65152 --- /dev/null +++ b/Lex/Preprocessor.cpp @@ -0,0 +1,2087 @@ +//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Preprocessor interface. +// +//===----------------------------------------------------------------------===// +// +// Options to support: +// -H - Print the name of each header file used. +// -d[MDNI] - Dump various things. +// -fworking-directory - #line's with preprocessor's working dir. +// -fpreprocessed +// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD +// -W* +// -w +// +// Messages to emit: +// "Multiple include guards may be useful for:\n" +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Pragma.h" +#include "clang/Lex/ScratchBuffer.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallVector.h" +#include <iostream> +using namespace clang; + +//===----------------------------------------------------------------------===// + +Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, + TargetInfo &target, SourceManager &SM, + HeaderSearch &Headers) + : Diags(diags), Features(opts), Target(target), FileMgr(Headers.getFileMgr()), + SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts), + CurLexer(0), CurDirLookup(0), CurMacroExpander(0), Callbacks(0) { + ScratchBuf = new ScratchBuffer(SourceMgr); + + // Clear stats. + NumDirectives = NumDefined = NumUndefined = NumPragma = 0; + NumIf = NumElse = NumEndif = 0; + NumEnteredSourceFiles = 0; + NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; + NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; + MaxIncludeStackDepth = 0; + NumSkipped = 0; + + // Default to discarding comments. + KeepComments = false; + KeepMacroComments = false; + + // Macro expansion is enabled. + DisableMacroExpansion = false; + InMacroArgs = false; + + // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. + // This gets unpoisoned where it is allowed. + (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); + + // Initialize the pragma handlers. + PragmaHandlers = new PragmaNamespace(0); + RegisterBuiltinPragmas(); + + // Initialize builtin macros like __LINE__ and friends. + RegisterBuiltinMacros(); +} + +Preprocessor::~Preprocessor() { + // Free any active lexers. + delete CurLexer; + + while (!IncludeMacroStack.empty()) { + delete IncludeMacroStack.back().TheLexer; + delete IncludeMacroStack.back().TheMacroExpander; + IncludeMacroStack.pop_back(); + } + + // Release pragma information. + delete PragmaHandlers; + + // Delete the scratch buffer info. + delete ScratchBuf; +} + +PPCallbacks::~PPCallbacks() { +} + +/// Diag - Forwarding function for diagnostics. This emits a diagnostic at +/// the specified LexerToken's location, translating the token's start +/// position in the current buffer into a SourcePosition object for rendering. +void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID) { + Diags.Report(Loc, DiagID); +} + +void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg) { + Diags.Report(Loc, DiagID, &Msg, 1); +} + +void Preprocessor::DumpToken(const LexerToken &Tok, bool DumpFlags) const { + std::cerr << tok::getTokenName(Tok.getKind()) << " '" + << getSpelling(Tok) << "'"; + + if (!DumpFlags) return; + std::cerr << "\t"; + if (Tok.isAtStartOfLine()) + std::cerr << " [StartOfLine]"; + if (Tok.hasLeadingSpace()) + std::cerr << " [LeadingSpace]"; + if (Tok.isExpandDisabled()) + std::cerr << " [ExpandDisabled]"; + if (Tok.needsCleaning()) { + const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); + std::cerr << " [UnClean='" << std::string(Start, Start+Tok.getLength()) + << "']"; + } +} + +void Preprocessor::DumpMacro(const MacroInfo &MI) const { + std::cerr << "MACRO: "; + for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { + DumpToken(MI.getReplacementToken(i)); + std::cerr << " "; + } + std::cerr << "\n"; +} + +void Preprocessor::PrintStats() { + std::cerr << "\n*** Preprocessor Stats:\n"; + std::cerr << NumDirectives << " directives found:\n"; + std::cerr << " " << NumDefined << " #define.\n"; + std::cerr << " " << NumUndefined << " #undef.\n"; + std::cerr << " #include/#include_next/#import:\n"; + std::cerr << " " << NumEnteredSourceFiles << " source files entered.\n"; + std::cerr << " " << MaxIncludeStackDepth << " max include stack depth\n"; + std::cerr << " " << NumIf << " #if/#ifndef/#ifdef.\n"; + std::cerr << " " << NumElse << " #else/#elif.\n"; + std::cerr << " " << NumEndif << " #endif.\n"; + std::cerr << " " << NumPragma << " #pragma.\n"; + std::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; + + std::cerr << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" + << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " + << NumFastMacroExpanded << " on the fast path.\n"; + std::cerr << (NumFastTokenPaste+NumTokenPaste) + << " token paste (##) operations performed, " + << NumFastTokenPaste << " on the fast path.\n"; +} + +//===----------------------------------------------------------------------===// +// Token Spelling +//===----------------------------------------------------------------------===// + + +/// getSpelling() - Return the 'spelling' of this token. The spelling of a +/// token are the characters used to represent the token in the source file +/// after trigraph expansion and escaped-newline folding. In particular, this +/// wants to get the true, uncanonicalized, spelling of things like digraphs +/// UCNs, etc. +std::string Preprocessor::getSpelling(const LexerToken &Tok) const { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token contains nothing interesting, return it directly. + const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + if (!Tok.needsCleaning()) + return std::string(TokStart, TokStart+Tok.getLength()); + + std::string Result; + Result.reserve(Tok.getLength()); + + // Otherwise, hard case, relex the characters into the string. + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); + Ptr += CharSize; + } + assert(Result.size() != unsigned(Tok.getLength()) && + "NeedsCleaning flag set on something that didn't need cleaning!"); + return Result; +} + +/// getSpelling - This method is used to get the spelling of a token into a +/// preallocated buffer, instead of as an std::string. The caller is required +/// to allocate enough space for the token, which is guaranteed to be at least +/// Tok.getLength() bytes long. The actual length of the token is returned. +/// +/// Note that this method may do two possible things: it may either fill in +/// the buffer specified with characters, or it may *change the input pointer* +/// to point to a constant buffer with the data already in it (avoiding a +/// copy). The caller is not allowed to modify the returned buffer pointer +/// if an internal buffer is returned. +unsigned Preprocessor::getSpelling(const LexerToken &Tok, + const char *&Buffer) const { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token is an identifier, just return the string from the identifier + // table, which is very quick. + if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { + Buffer = II->getName(); + return Tok.getLength(); + } + + // Otherwise, compute the start of the token in the input lexer buffer. + const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + + // If this token contains nothing interesting, return it directly. + if (!Tok.needsCleaning()) { + Buffer = TokStart; + return Tok.getLength(); + } + // Otherwise, hard case, relex the characters into the string. + char *OutBuf = const_cast<char*>(Buffer); + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); + Ptr += CharSize; + } + assert(unsigned(OutBuf-Buffer) != Tok.getLength() && + "NeedsCleaning flag set on something that didn't need cleaning!"); + + return OutBuf-Buffer; +} + + +/// CreateString - Plop the specified string into a scratch buffer and return a +/// location for it. If specified, the source location provides a source +/// location for the token. +SourceLocation Preprocessor:: +CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) { + if (SLoc.isValid()) + return ScratchBuf->getToken(Buf, Len, SLoc); + return ScratchBuf->getToken(Buf, Len); +} + + +//===----------------------------------------------------------------------===// +// Source File Location Methods. +//===----------------------------------------------------------------------===// + +/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, +/// return null on failure. isAngled indicates whether the file reference is +/// for system #include's or not (i.e. using <> instead of ""). +const FileEntry *Preprocessor::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + bool isAngled, + const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir) { + // If the header lookup mechanism may be relative to the current file, pass in + // info about where the current file is. + const FileEntry *CurFileEnt = 0; + if (!FromDir) { + unsigned TheFileID = getCurrentFileLexer()->getCurFileID(); + CurFileEnt = SourceMgr.getFileEntryForFileID(TheFileID); + } + + // Do a standard file entry lookup. + CurDir = CurDirLookup; + const FileEntry *FE = + HeaderInfo.LookupFile(FilenameStart, FilenameEnd, + isAngled, FromDir, CurDir, CurFileEnt); + if (FE) return FE; + + // Otherwise, see if this is a subframework header. If so, this is relative + // to one of the headers on the #include stack. Walk the list of the current + // headers on the #include stack and pass them to HeaderInfo. + if (CurLexer && !CurLexer->Is_PragmaLexer) { + CurFileEnt = SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID()); + if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd, + CurFileEnt))) + return FE; + } + + for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) { + IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1]; + if (ISEntry.TheLexer && !ISEntry.TheLexer->Is_PragmaLexer) { + CurFileEnt = + SourceMgr.getFileEntryForFileID(ISEntry.TheLexer->getCurFileID()); + if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd, + CurFileEnt))) + return FE; + } + } + + // Otherwise, we really couldn't find the file. + return 0; +} + +/// isInPrimaryFile - Return true if we're in the top-level file, not in a +/// #include. +bool Preprocessor::isInPrimaryFile() const { + if (CurLexer && !CurLexer->Is_PragmaLexer) + return CurLexer->isMainFile(); + + // If there are any stacked lexers, we're in a #include. + for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) + if (IncludeMacroStack[i].TheLexer && + !IncludeMacroStack[i].TheLexer->Is_PragmaLexer) + return IncludeMacroStack[i].TheLexer->isMainFile(); + return false; +} + +/// getCurrentLexer - Return the current file lexer being lexed from. Note +/// that this ignores any potentially active macro expansions and _Pragma +/// expansions going on at the time. +Lexer *Preprocessor::getCurrentFileLexer() const { + if (CurLexer && !CurLexer->Is_PragmaLexer) return CurLexer; + + // Look for a stacked lexer. + for (unsigned i = IncludeMacroStack.size(); i != 0; --i) { + Lexer *L = IncludeMacroStack[i-1].TheLexer; + if (L && !L->Is_PragmaLexer) // Ignore macro & _Pragma expansions. + return L; + } + return 0; +} + + +/// EnterSourceFile - Add a source file to the top of the include stack and +/// start lexing tokens from it instead of the current buffer. Return true +/// on failure. +void Preprocessor::EnterSourceFile(unsigned FileID, + const DirectoryLookup *CurDir, + bool isMainFile) { + assert(CurMacroExpander == 0 && "Cannot #include a file inside a macro!"); + ++NumEnteredSourceFiles; + + if (MaxIncludeStackDepth < IncludeMacroStack.size()) + MaxIncludeStackDepth = IncludeMacroStack.size(); + + const llvm::MemoryBuffer *Buffer = SourceMgr.getBuffer(FileID); + Lexer *TheLexer = new Lexer(Buffer, FileID, *this); + if (isMainFile) TheLexer->setIsMainFile(); + EnterSourceFileWithLexer(TheLexer, CurDir); +} + +/// EnterSourceFile - Add a source file to the top of the include stack and +/// start lexing tokens from it instead of the current buffer. +void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, + const DirectoryLookup *CurDir) { + + // Add the current lexer to the include stack. + if (CurLexer || CurMacroExpander) + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurMacroExpander)); + + CurLexer = TheLexer; + CurDirLookup = CurDir; + CurMacroExpander = 0; + + // Notify the client, if desired, that we are in a new source file. + if (Callbacks && !CurLexer->Is_PragmaLexer) { + DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir; + + // Get the file entry for the current file. + if (const FileEntry *FE = + SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID())) + FileType = HeaderInfo.getFileDirFlavor(FE); + + Callbacks->FileChanged(SourceLocation(CurLexer->getCurFileID(), 0), + PPCallbacks::EnterFile, FileType); + } +} + + + +/// EnterMacro - Add a Macro to the top of the include stack and start lexing +/// tokens from it instead of the current buffer. +void Preprocessor::EnterMacro(LexerToken &Tok, MacroArgs *Args) { + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurMacroExpander)); + CurLexer = 0; + CurDirLookup = 0; + + CurMacroExpander = new MacroExpander(Tok, Args, *this); +} + +/// EnterTokenStream - Add a "macro" context to the top of the include stack, +/// which will cause the lexer to start returning the specified tokens. Note +/// that these tokens will be re-macro-expanded when/if expansion is enabled. +/// This method assumes that the specified stream of tokens has a permanent +/// owner somewhere, so they do not need to be copied. +void Preprocessor::EnterTokenStream(const LexerToken *Toks, unsigned NumToks) { + // Save our current state. + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurMacroExpander)); + CurLexer = 0; + CurDirLookup = 0; + + // Create a macro expander to expand from the specified token stream. + CurMacroExpander = new MacroExpander(Toks, NumToks, *this); +} + +/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the +/// lexer stack. This should only be used in situations where the current +/// state of the top-of-stack lexer is known. +void Preprocessor::RemoveTopOfLexerStack() { + assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load"); + delete CurLexer; + delete CurMacroExpander; + CurLexer = IncludeMacroStack.back().TheLexer; + CurDirLookup = IncludeMacroStack.back().TheDirLookup; + CurMacroExpander = IncludeMacroStack.back().TheMacroExpander; + IncludeMacroStack.pop_back(); +} + +//===----------------------------------------------------------------------===// +// Macro Expansion Handling. +//===----------------------------------------------------------------------===// + +/// RegisterBuiltinMacro - Register the specified identifier in the identifier +/// table and mark it as a builtin macro to be expanded. +IdentifierInfo *Preprocessor::RegisterBuiltinMacro(const char *Name) { + // Get the identifier. + IdentifierInfo *Id = getIdentifierInfo(Name); + + // Mark it as being a macro that is builtin. + MacroInfo *MI = new MacroInfo(SourceLocation()); + MI->setIsBuiltinMacro(); + Id->setMacroInfo(MI); + return Id; +} + + +/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the +/// identifier table. +void Preprocessor::RegisterBuiltinMacros() { + Ident__LINE__ = RegisterBuiltinMacro("__LINE__"); + Ident__FILE__ = RegisterBuiltinMacro("__FILE__"); + Ident__DATE__ = RegisterBuiltinMacro("__DATE__"); + Ident__TIME__ = RegisterBuiltinMacro("__TIME__"); + Ident_Pragma = RegisterBuiltinMacro("_Pragma"); + + // GCC Extensions. + Ident__BASE_FILE__ = RegisterBuiltinMacro("__BASE_FILE__"); + Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__"); + Ident__TIMESTAMP__ = RegisterBuiltinMacro("__TIMESTAMP__"); +} + +/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token +/// in its expansion, currently expands to that token literally. +static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, + const IdentifierInfo *MacroIdent) { + IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo(); + + // If the token isn't an identifier, it's always literally expanded. + if (II == 0) return true; + + // If the identifier is a macro, and if that macro is enabled, it may be + // expanded so it's not a trivial expansion. + if (II->getMacroInfo() && II->getMacroInfo()->isEnabled() && + // Fast expanding "#define X X" is ok, because X would be disabled. + II != MacroIdent) + return false; + + // If this is an object-like macro invocation, it is safe to trivially expand + // it. + if (MI->isObjectLike()) return true; + + // If this is a function-like macro invocation, it's safe to trivially expand + // as long as the identifier is not a macro argument. + for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end(); + I != E; ++I) + if (*I == II) + return false; // Identifier is a macro argument. + + return true; +} + + +/// isNextPPTokenLParen - Determine whether the next preprocessor token to be +/// lexed is a '('. If so, consume the token and return true, if not, this +/// method should have no observable side-effect on the lexed tokens. +bool Preprocessor::isNextPPTokenLParen() { + // Do some quick tests for rejection cases. + unsigned Val; + if (CurLexer) + Val = CurLexer->isNextPPTokenLParen(); + else + Val = CurMacroExpander->isNextTokenLParen(); + + if (Val == 2) { + // If we ran off the end of the lexer or macro expander, walk the include + // stack, looking for whatever will return the next token. + for (unsigned i = IncludeMacroStack.size(); Val == 2 && i != 0; --i) { + IncludeStackInfo &Entry = IncludeMacroStack[i-1]; + if (Entry.TheLexer) + Val = Entry.TheLexer->isNextPPTokenLParen(); + else + Val = Entry.TheMacroExpander->isNextTokenLParen(); + } + } + + // Okay, if we know that the token is a '(', lex it and return. Otherwise we + // have found something that isn't a '(' or we found the end of the + // translation unit. In either case, return false. + if (Val != 1) + return false; + + LexerToken Tok; + LexUnexpandedToken(Tok); + assert(Tok.getKind() == tok::l_paren && "Error computing l-paren-ness?"); + return true; +} + +/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be +/// expanded as a macro, handle it and return the next token as 'Identifier'. +bool Preprocessor::HandleMacroExpandedIdentifier(LexerToken &Identifier, + MacroInfo *MI) { + + // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. + if (MI->isBuiltinMacro()) { + ExpandBuiltinMacro(Identifier); + return false; + } + + // If this is the first use of a target-specific macro, warn about it. + if (MI->isTargetSpecific()) { + MI->setIsTargetSpecific(false); // Don't warn on second use. + getTargetInfo().DiagnoseNonPortability(Identifier.getLocation(), + diag::port_target_macro_use); + } + + /// Args - If this is a function-like macro expansion, this contains, + /// for each macro argument, the list of tokens that were provided to the + /// invocation. + MacroArgs *Args = 0; + + // If this is a function-like macro, read the arguments. + if (MI->isFunctionLike()) { + // C99 6.10.3p10: If the preprocessing token immediately after the the macro + // name isn't a '(', this macro should not be expanded. + if (!isNextPPTokenLParen()) + return true; + + // Remember that we are now parsing the arguments to a macro invocation. + // Preprocessor directives used inside macro arguments are not portable, and + // this enables the warning. + InMacroArgs = true; + Args = ReadFunctionLikeMacroArgs(Identifier, MI); + + // Finished parsing args. + InMacroArgs = false; + + // If there was an error parsing the arguments, bail out. + if (Args == 0) return false; + + ++NumFnMacroExpanded; + } else { + ++NumMacroExpanded; + } + + // Notice that this macro has been used. + MI->setIsUsed(true); + + // If we started lexing a macro, enter the macro expansion body. + + // If this macro expands to no tokens, don't bother to push it onto the + // expansion stack, only to take it right back off. + if (MI->getNumTokens() == 0) { + // No need for arg info. + if (Args) Args->destroy(); + + // Ignore this macro use, just return the next token in the current + // buffer. + bool HadLeadingSpace = Identifier.hasLeadingSpace(); + bool IsAtStartOfLine = Identifier.isAtStartOfLine(); + + Lex(Identifier); + + // If the identifier isn't on some OTHER line, inherit the leading + // whitespace/first-on-a-line property of this token. This handles + // stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is + // empty. + if (!Identifier.isAtStartOfLine()) { + if (IsAtStartOfLine) Identifier.setFlag(LexerToken::StartOfLine); + if (HadLeadingSpace) Identifier.setFlag(LexerToken::LeadingSpace); + } + ++NumFastMacroExpanded; + return false; + + } else if (MI->getNumTokens() == 1 && + isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo())){ + // Otherwise, if this macro expands into a single trivially-expanded + // token: expand it now. This handles common cases like + // "#define VAL 42". + + // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro + // identifier to the expanded token. + bool isAtStartOfLine = Identifier.isAtStartOfLine(); + bool hasLeadingSpace = Identifier.hasLeadingSpace(); + + // Remember where the token is instantiated. + SourceLocation InstantiateLoc = Identifier.getLocation(); + + // Replace the result token. + Identifier = MI->getReplacementToken(0); + + // Restore the StartOfLine/LeadingSpace markers. + Identifier.setFlagValue(LexerToken::StartOfLine , isAtStartOfLine); + Identifier.setFlagValue(LexerToken::LeadingSpace, hasLeadingSpace); + + // Update the tokens location to include both its logical and physical + // locations. + SourceLocation Loc = + SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc); + Identifier.setLocation(Loc); + + // If this is #define X X, we must mark the result as unexpandible. + if (IdentifierInfo *NewII = Identifier.getIdentifierInfo()) + if (NewII->getMacroInfo() == MI) + Identifier.setFlag(LexerToken::DisableExpand); + + // Since this is not an identifier token, it can't be macro expanded, so + // we're done. + ++NumFastMacroExpanded; + return false; + } + + // Start expanding the macro. + EnterMacro(Identifier, Args); + + // Now that the macro is at the top of the include stack, ask the + // preprocessor to read the next token from it. + Lex(Identifier); + return false; +} + +/// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is +/// invoked to read all of the actual arguments specified for the macro +/// invocation. This returns null on error. +MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(LexerToken &MacroName, + MacroInfo *MI) { + // The number of fixed arguments to parse. + unsigned NumFixedArgsLeft = MI->getNumArgs(); + bool isVariadic = MI->isVariadic(); + + // Outer loop, while there are more arguments, keep reading them. + LexerToken Tok; + Tok.setKind(tok::comma); + --NumFixedArgsLeft; // Start reading the first arg. + + // ArgTokens - Build up a list of tokens that make up each argument. Each + // argument is separated by an EOF token. Use a SmallVector so we can avoid + // heap allocations in the common case. + llvm::SmallVector<LexerToken, 64> ArgTokens; + + unsigned NumActuals = 0; + while (Tok.getKind() == tok::comma) { + // C99 6.10.3p11: Keep track of the number of l_parens we have seen. + unsigned NumParens = 0; + + while (1) { + // Read arguments as unexpanded tokens. This avoids issues, e.g., where + // an argument value in a macro could expand to ',' or '(' or ')'. + LexUnexpandedToken(Tok); + + if (Tok.getKind() == tok::eof) { + Diag(MacroName, diag::err_unterm_macro_invoc); + // Do not lose the EOF. Return it to the client. + MacroName = Tok; + return 0; + } else if (Tok.getKind() == tok::r_paren) { + // If we found the ) token, the macro arg list is done. + if (NumParens-- == 0) + break; + } else if (Tok.getKind() == tok::l_paren) { + ++NumParens; + } else if (Tok.getKind() == tok::comma && NumParens == 0) { + // Comma ends this argument if there are more fixed arguments expected. + if (NumFixedArgsLeft) + break; + + // If this is not a variadic macro, too many args were specified. + if (!isVariadic) { + // Emit the diagnostic at the macro name in case there is a missing ). + // Emitting it at the , could be far away from the macro name. + Diag(MacroName, diag::err_too_many_args_in_macro_invoc); + return 0; + } + // Otherwise, continue to add the tokens to this variable argument. + } else if (Tok.getKind() == tok::comment && !KeepMacroComments) { + // If this is a comment token in the argument list and we're just in + // -C mode (not -CC mode), discard the comment. + continue; + } + + ArgTokens.push_back(Tok); + } + + // Empty arguments are standard in C99 and supported as an extension in + // other modes. + if (ArgTokens.empty() && !Features.C99) + Diag(Tok, diag::ext_empty_fnmacro_arg); + + // Add a marker EOF token to the end of the token list for this argument. + LexerToken EOFTok; + EOFTok.startToken(); + EOFTok.setKind(tok::eof); + EOFTok.setLocation(Tok.getLocation()); + EOFTok.setLength(0); + ArgTokens.push_back(EOFTok); + ++NumActuals; + --NumFixedArgsLeft; + }; + + // Okay, we either found the r_paren. Check to see if we parsed too few + // arguments. + unsigned MinArgsExpected = MI->getNumArgs(); + + // See MacroArgs instance var for description of this. + bool isVarargsElided = false; + + if (NumActuals < MinArgsExpected) { + // There are several cases where too few arguments is ok, handle them now. + if (NumActuals+1 == MinArgsExpected && MI->isVariadic()) { + // Varargs where the named vararg parameter is missing: ok as extension. + // #define A(x, ...) + // A("blah") + Diag(Tok, diag::ext_missing_varargs_arg); + + // Remember this occurred if this is a C99 macro invocation with at least + // one actual argument. + isVarargsElided = MI->isC99Varargs() && MI->getNumArgs() > 1; + } else if (MI->getNumArgs() == 1) { + // #define A(x) + // A() + // is ok because it is an empty argument. + + // Empty arguments are standard in C99 and supported as an extension in + // other modes. + if (ArgTokens.empty() && !Features.C99) + Diag(Tok, diag::ext_empty_fnmacro_arg); + } else { + // Otherwise, emit the error. + Diag(Tok, diag::err_too_few_args_in_macro_invoc); + return 0; + } + + // Add a marker EOF token to the end of the token list for this argument. + SourceLocation EndLoc = Tok.getLocation(); + Tok.startToken(); + Tok.setKind(tok::eof); + Tok.setLocation(EndLoc); + Tok.setLength(0); + ArgTokens.push_back(Tok); + } + + return MacroArgs::create(MI, &ArgTokens[0], ArgTokens.size(),isVarargsElided); +} + +/// ComputeDATE_TIME - Compute the current time, enter it into the specified +/// scratch buffer, then return DATELoc/TIMELoc locations with the position of +/// the identifier tokens inserted. +static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, + Preprocessor &PP) { + time_t TT = time(0); + struct tm *TM = localtime(&TT); + + static const char * const Months[] = { + "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec" + }; + + char TmpBuffer[100]; + sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, + TM->tm_year+1900); + DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); + + sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec); + TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); +} + +/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded +/// as a builtin macro, handle it and return the next token as 'Tok'. +void Preprocessor::ExpandBuiltinMacro(LexerToken &Tok) { + // Figure out which token this is. + IdentifierInfo *II = Tok.getIdentifierInfo(); + assert(II && "Can't be a macro without id info!"); + + // If this is an _Pragma directive, expand it, invoke the pragma handler, then + // lex the token after it. + if (II == Ident_Pragma) + return Handle_Pragma(Tok); + + ++NumBuiltinMacroExpanded; + + char TmpBuffer[100]; + + // Set up the return result. + Tok.setIdentifierInfo(0); + Tok.clearFlag(LexerToken::NeedsCleaning); + + if (II == Ident__LINE__) { + // __LINE__ expands to a simple numeric value. + sprintf(TmpBuffer, "%u", SourceMgr.getLineNumber(Tok.getLocation())); + unsigned Length = strlen(TmpBuffer); + Tok.setKind(tok::numeric_constant); + Tok.setLength(Length); + Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation())); + } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) { + SourceLocation Loc = Tok.getLocation(); + if (II == Ident__BASE_FILE__) { + Diag(Tok, diag::ext_pp_base_file); + SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc.getFileID()); + while (NextLoc.getFileID() != 0) { + Loc = NextLoc; + NextLoc = SourceMgr.getIncludeLoc(Loc.getFileID()); + } + } + + // Escape this filename. Turn '\' -> '\\' '"' -> '\"' + std::string FN = SourceMgr.getSourceName(Loc); + FN = '"' + Lexer::Stringify(FN) + '"'; + Tok.setKind(tok::string_literal); + Tok.setLength(FN.size()); + Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation())); + } else if (II == Ident__DATE__) { + if (!DATELoc.isValid()) + ComputeDATE_TIME(DATELoc, TIMELoc, *this); + Tok.setKind(tok::string_literal); + Tok.setLength(strlen("\"Mmm dd yyyy\"")); + Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation())); + } else if (II == Ident__TIME__) { + if (!TIMELoc.isValid()) + ComputeDATE_TIME(DATELoc, TIMELoc, *this); + Tok.setKind(tok::string_literal); + Tok.setLength(strlen("\"hh:mm:ss\"")); + Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation())); + } else if (II == Ident__INCLUDE_LEVEL__) { + Diag(Tok, diag::ext_pp_include_level); + + // Compute the include depth of this token. + unsigned Depth = 0; + SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation().getFileID()); + for (; Loc.getFileID() != 0; ++Depth) + Loc = SourceMgr.getIncludeLoc(Loc.getFileID()); + + // __INCLUDE_LEVEL__ expands to a simple numeric value. + sprintf(TmpBuffer, "%u", Depth); + unsigned Length = strlen(TmpBuffer); + Tok.setKind(tok::numeric_constant); + Tok.setLength(Length); + Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation())); + } else if (II == Ident__TIMESTAMP__) { + // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be + // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime. + Diag(Tok, diag::ext_pp_timestamp); + + // Get the file that we are lexing out of. If we're currently lexing from + // a macro, dig into the include stack. + const FileEntry *CurFile = 0; + Lexer *TheLexer = getCurrentFileLexer(); + + if (TheLexer) + CurFile = SourceMgr.getFileEntryForFileID(TheLexer->getCurFileID()); + + // If this file is older than the file it depends on, emit a diagnostic. + const char *Result; + if (CurFile) { + time_t TT = CurFile->getModificationTime(); + struct tm *TM = localtime(&TT); + Result = asctime(TM); + } else { + Result = "??? ??? ?? ??:??:?? ????\n"; + } + TmpBuffer[0] = '"'; + strcpy(TmpBuffer+1, Result); + unsigned Len = strlen(TmpBuffer); + TmpBuffer[Len-1] = '"'; // Replace the newline with a quote. + Tok.setKind(tok::string_literal); + Tok.setLength(Len); + Tok.setLocation(CreateString(TmpBuffer, Len, Tok.getLocation())); + } else { + assert(0 && "Unknown identifier!"); + } +} + +//===----------------------------------------------------------------------===// +// Lexer Event Handling. +//===----------------------------------------------------------------------===// + +/// LookUpIdentifierInfo - Given a tok::identifier token, look up the +/// identifier information for the token and install it into the token. +IdentifierInfo *Preprocessor::LookUpIdentifierInfo(LexerToken &Identifier, + const char *BufPtr) { + assert(Identifier.getKind() == tok::identifier && "Not an identifier!"); + assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!"); + + // Look up this token, see if it is a macro, or if it is a language keyword. + IdentifierInfo *II; + if (BufPtr && !Identifier.needsCleaning()) { + // No cleaning needed, just use the characters from the lexed buffer. + II = getIdentifierInfo(BufPtr, BufPtr+Identifier.getLength()); + } else { + // Cleaning needed, alloca a buffer, clean into it, then use the buffer. + const char *TmpBuf = (char*)alloca(Identifier.getLength()); + unsigned Size = getSpelling(Identifier, TmpBuf); + II = getIdentifierInfo(TmpBuf, TmpBuf+Size); + } + Identifier.setIdentifierInfo(II); + return II; +} + + +/// HandleIdentifier - This callback is invoked when the lexer reads an +/// identifier. This callback looks up the identifier in the map and/or +/// potentially macro expands it or turns it into a named token (like 'for'). +void Preprocessor::HandleIdentifier(LexerToken &Identifier) { + assert(Identifier.getIdentifierInfo() && + "Can't handle identifiers without identifier info!"); + + IdentifierInfo &II = *Identifier.getIdentifierInfo( |