aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-11-17 07:26:20 +0000
committerChris Lattner <sabre@nondot.org>2010-11-17 07:26:20 +0000
commitb0607279cb98bbf2bbfe0db170aed39ef91e86a2 (patch)
tree69ceb8b0789f7c17c16d67617d67c966ef367cc5 /lib/Lex/Lexer.cpp
parent75072f2093995eb7ae0c0fa03bd439bbe8429d97 (diff)
move getSpelling from Preprocessor to Lexer, which it is more conceptually related to.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119479 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r--lib/Lex/Lexer.cpp101
1 files changed, 101 insertions, 0 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 3e68875768..da68495663 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -212,6 +212,107 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
}
}
+//===----------------------------------------------------------------------===//
+// Token Spelling
+//===----------------------------------------------------------------------===//
+
+/// getSpelling() - Return the 'spelling' of this token. The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding. In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
+ const LangOptions &Features, bool *Invalid) {
+ assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+ // If this token contains nothing interesting, return it directly.
+ bool CharDataInvalid = false;
+ const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
+ &CharDataInvalid);
+ if (Invalid)
+ *Invalid = CharDataInvalid;
+ if (CharDataInvalid)
+ return std::string();
+
+ if (!Tok.needsCleaning())
+ return std::string(TokStart, TokStart+Tok.getLength());
+
+ std::string Result;
+ Result.reserve(Tok.getLength());
+
+ // Otherwise, hard case, relex the characters into the string.
+ for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+ Ptr != End; ) {
+ unsigned CharSize;
+ Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+ Ptr += CharSize;
+ }
+ assert(Result.size() != unsigned(Tok.getLength()) &&
+ "NeedsCleaning flag set on something that didn't need cleaning!");
+ return Result;
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string. The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at least
+/// Tok.getLength() bytes long. The actual length of the token is returned.
+///
+/// Note that this method may do two possible things: it may either fill in
+/// the buffer specified with characters, or it may *change the input pointer*
+/// to point to a constant buffer with the data already in it (avoiding a
+/// copy). The caller is not allowed to modify the returned buffer pointer
+/// if an internal buffer is returned.
+unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
+ const SourceManager &SourceMgr,
+ const LangOptions &Features, bool *Invalid) {
+ assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+ // If this token is an identifier, just return the string from the identifier
+ // table, which is very quick.
+ if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+ Buffer = II->getNameStart();
+ return II->getLength();
+ }
+
+ // Otherwise, compute the start of the token in the input lexer buffer.
+ const char *TokStart = 0;
+
+ if (Tok.isLiteral())
+ TokStart = Tok.getLiteralData();
+
+ if (TokStart == 0) {
+ bool CharDataInvalid = false;
+ TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+ if (Invalid)
+ *Invalid = CharDataInvalid;
+ if (CharDataInvalid) {
+ Buffer = "";
+ return 0;
+ }
+ }
+
+ // If this token contains nothing interesting, return it directly.
+ if (!Tok.needsCleaning()) {
+ Buffer = TokStart;
+ return Tok.getLength();
+ }
+
+ // Otherwise, hard case, relex the characters into the string.
+ char *OutBuf = const_cast<char*>(Buffer);
+ for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+ Ptr != End; ) {
+ unsigned CharSize;
+ *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+ Ptr += CharSize;
+ }
+ assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+ "NeedsCleaning flag set on something that didn't need cleaning!");
+
+ return OutBuf-Buffer;
+}
+
+
+
static bool isWhitespace(unsigned char c);
/// MeasureTokenLength - Relex the token at the specified location and return