diff options
author | Chris Lattner <sabre@nondot.org> | 2010-11-17 07:26:20 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-11-17 07:26:20 +0000 |
commit | b0607279cb98bbf2bbfe0db170aed39ef91e86a2 (patch) | |
tree | 69ceb8b0789f7c17c16d67617d67c966ef367cc5 /lib/Lex/Lexer.cpp | |
parent | 75072f2093995eb7ae0c0fa03bd439bbe8429d97 (diff) |
move getSpelling from Preprocessor to Lexer, which it is more conceptually related to.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119479 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r-- | lib/Lex/Lexer.cpp | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 3e68875768..da68495663 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -212,6 +212,107 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) { } } +//===----------------------------------------------------------------------===// +// Token Spelling +//===----------------------------------------------------------------------===// + +/// getSpelling() - Return the 'spelling' of this token. The spelling of a +/// token are the characters used to represent the token in the source file +/// after trigraph expansion and escaped-newline folding. In particular, this +/// wants to get the true, uncanonicalized, spelling of things like digraphs +/// UCNs, etc. +std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, + const LangOptions &Features, bool *Invalid) { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token contains nothing interesting, return it directly. + bool CharDataInvalid = false; + const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), + &CharDataInvalid); + if (Invalid) + *Invalid = CharDataInvalid; + if (CharDataInvalid) + return std::string(); + + if (!Tok.needsCleaning()) + return std::string(TokStart, TokStart+Tok.getLength()); + + std::string Result; + Result.reserve(Tok.getLength()); + + // Otherwise, hard case, relex the characters into the string. + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); + Ptr += CharSize; + } + assert(Result.size() != unsigned(Tok.getLength()) && + "NeedsCleaning flag set on something that didn't need cleaning!"); + return Result; +} + +/// getSpelling - This method is used to get the spelling of a token into a +/// preallocated buffer, instead of as an std::string. The caller is required +/// to allocate enough space for the token, which is guaranteed to be at least +/// Tok.getLength() bytes long. The actual length of the token is returned. +/// +/// Note that this method may do two possible things: it may either fill in +/// the buffer specified with characters, or it may *change the input pointer* +/// to point to a constant buffer with the data already in it (avoiding a +/// copy). The caller is not allowed to modify the returned buffer pointer +/// if an internal buffer is returned. +unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, + const SourceManager &SourceMgr, + const LangOptions &Features, bool *Invalid) { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token is an identifier, just return the string from the identifier + // table, which is very quick. + if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { + Buffer = II->getNameStart(); + return II->getLength(); + } + + // Otherwise, compute the start of the token in the input lexer buffer. + const char *TokStart = 0; + + if (Tok.isLiteral()) + TokStart = Tok.getLiteralData(); + + if (TokStart == 0) { + bool CharDataInvalid = false; + TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); + if (Invalid) + *Invalid = CharDataInvalid; + if (CharDataInvalid) { + Buffer = ""; + return 0; + } + } + + // If this token contains nothing interesting, return it directly. + if (!Tok.needsCleaning()) { + Buffer = TokStart; + return Tok.getLength(); + } + + // Otherwise, hard case, relex the characters into the string. + char *OutBuf = const_cast<char*>(Buffer); + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); + Ptr += CharSize; + } + assert(unsigned(OutBuf-Buffer) != Tok.getLength() && + "NeedsCleaning flag set on something that didn't need cleaning!"); + + return OutBuf-Buffer; +} + + + static bool isWhitespace(unsigned char c); /// MeasureTokenLength - Relex the token at the specified location and return |