Fix my earlier commit to work with escaped newlines and leave breadcrumbs

in case we want to make a world where we can check intermediate instantiations for this kind of breadcrumb. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@127221 91177308-0d34-0410-b5e6-96231b3b80d8
author: John McCall <rjmccall@apple.com> 2011-03-08 07:59:04 +0000
committer: John McCall <rjmccall@apple.com> 2011-03-08 07:59:04 +0000
commit: 834e3f6c77d9ac03997a3f0c56934edcf406a355 (patch)
tree: 9996ca50172c3a9cb33026731c560de98b8f4cc7 /lib/Lex/Lexer.cpp
parent: 2792fa5115c5de7cbe11d99d23663c569bfb4cae (diff)
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 9643d8e978..b511421ee7 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -221,6 +221,54 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
 /// after trigraph expansion and escaped-newline folding.  In particular, this
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
+llvm::StringRef Lexer::getSpelling(SourceLocation loc,
+                                   llvm::SmallVectorImpl<char> &buffer,
+                                   const SourceManager &SM,
+                                   const LangOptions &options,
+                                   bool *invalid) {
+  // Break down the source location.
+  std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc);
+
+  // Try to the load the file buffer.
+  bool invalidTemp = false;
+  llvm::StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
+  if (invalidTemp) {
+    if (invalid) *invalid = true;
+    return llvm::StringRef();
+  }
+
+  const char *tokenBegin = file.data() + locInfo.second;
+
+  // Lex from the start of the given location.
+  Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options,
+              file.begin(), tokenBegin, file.end());
+  Token token;
+  lexer.LexFromRawLexer(token);
+
+  unsigned length = token.getLength();
+
+  // Common case:  no need for cleaning.
+  if (!token.needsCleaning())
+    return llvm::StringRef(tokenBegin, length);
+  
+  // Hard case, we need to relex the characters into the string.
+  buffer.clear();
+  buffer.reserve(length);
+  
+  for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) {
+    unsigned charSize;
+    buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options));
+    ti += charSize;
+  }
+
+  return llvm::StringRef(buffer.data(), buffer.size());
+}
+
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
 std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
                                const LangOptions &Features, bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
author	John McCall <rjmccall@apple.com>	2011-03-08 07:59:04 +0000
committer	John McCall <rjmccall@apple.com>	2011-03-08 07:59:04 +0000
commit	834e3f6c77d9ac03997a3f0c56934edcf406a355 (patch)
tree	9996ca50172c3a9cb33026731c560de98b8f4cc7 /lib/Lex/Lexer.cpp
parent	2792fa5115c5de7cbe11d99d23663c569bfb4cae (diff)