diff options
author | John McCall <rjmccall@apple.com> | 2011-03-08 07:59:04 +0000 |
---|---|---|
committer | John McCall <rjmccall@apple.com> | 2011-03-08 07:59:04 +0000 |
commit | 834e3f6c77d9ac03997a3f0c56934edcf406a355 (patch) | |
tree | 9996ca50172c3a9cb33026731c560de98b8f4cc7 /lib/Lex/Lexer.cpp | |
parent | 2792fa5115c5de7cbe11d99d23663c569bfb4cae (diff) |
Fix my earlier commit to work with escaped newlines and leave breadcrumbs
in case we want to make a world where we can check intermediate instantiations
for this kind of breadcrumb.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@127221 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r-- | lib/Lex/Lexer.cpp | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 9643d8e978..b511421ee7 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -221,6 +221,54 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) { /// after trigraph expansion and escaped-newline folding. In particular, this /// wants to get the true, uncanonicalized, spelling of things like digraphs /// UCNs, etc. +llvm::StringRef Lexer::getSpelling(SourceLocation loc, + llvm::SmallVectorImpl<char> &buffer, + const SourceManager &SM, + const LangOptions &options, + bool *invalid) { + // Break down the source location. + std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc); + + // Try to the load the file buffer. + bool invalidTemp = false; + llvm::StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); + if (invalidTemp) { + if (invalid) *invalid = true; + return llvm::StringRef(); + } + + const char *tokenBegin = file.data() + locInfo.second; + + // Lex from the start of the given location. + Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options, + file.begin(), tokenBegin, file.end()); + Token token; + lexer.LexFromRawLexer(token); + + unsigned length = token.getLength(); + + // Common case: no need for cleaning. + if (!token.needsCleaning()) + return llvm::StringRef(tokenBegin, length); + + // Hard case, we need to relex the characters into the string. + buffer.clear(); + buffer.reserve(length); + + for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) { + unsigned charSize; + buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options)); + ti += charSize; + } + + return llvm::StringRef(buffer.data(), buffer.size()); +} + +/// getSpelling() - Return the 'spelling' of this token. The spelling of a +/// token are the characters used to represent the token in the source file +/// after trigraph expansion and escaped-newline folding. In particular, this +/// wants to get the true, uncanonicalized, spelling of things like digraphs +/// UCNs, etc. std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, const LangOptions &Features, bool *Invalid) { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); |