move getSpelling from Preprocessor to Lexer, which it is more conceptually related to.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119479 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2010-11-17 07:26:20 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-11-17 07:26:20 +0000
commit: b0607279cb98bbf2bbfe0db170aed39ef91e86a2 (patch)
tree: 69ceb8b0789f7c17c16d67617d67c966ef367cc5 /lib/Lex/Lexer.cpp
parent: 75072f2093995eb7ae0c0fa03bd439bbe8429d97 (diff)
1 files changed, 101 insertions, 0 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 3e68875768..da68495663 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -212,6 +212,107 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
   }
 }
 
+//===----------------------------------------------------------------------===//
+// Token Spelling
+//===----------------------------------------------------------------------===//
+
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
+                               const LangOptions &Features, bool *Invalid) {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  
+  // If this token contains nothing interesting, return it directly.
+  bool CharDataInvalid = false;
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+                                                    &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  if (CharDataInvalid)
+    return std::string();
+  
+  if (!Tok.needsCleaning())
+    return std::string(TokStart, TokStart+Tok.getLength());
+  
+  std::string Result;
+  Result.reserve(Tok.getLength());
+  
+  // Otherwise, hard case, relex the characters into the string.
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+    Ptr += CharSize;
+  }
+  assert(Result.size() != unsigned(Tok.getLength()) &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  return Result;
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string.  The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at least
+/// Tok.getLength() bytes long.  The actual length of the token is returned.
+///
+/// Note that this method may do two possible things: it may either fill in
+/// the buffer specified with characters, or it may *change the input pointer*
+/// to point to a constant buffer with the data already in it (avoiding a
+/// copy).  The caller is not allowed to modify the returned buffer pointer
+/// if an internal buffer is returned.
+unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, 
+                            const SourceManager &SourceMgr,
+                            const LangOptions &Features, bool *Invalid) {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  
+  // If this token is an identifier, just return the string from the identifier
+  // table, which is very quick.
+  if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    Buffer = II->getNameStart();
+    return II->getLength();
+  }
+  
+  // Otherwise, compute the start of the token in the input lexer buffer.
+  const char *TokStart = 0;
+  
+  if (Tok.isLiteral())
+    TokStart = Tok.getLiteralData();
+  
+  if (TokStart == 0) {
+    bool CharDataInvalid = false;
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+    if (Invalid)
+      *Invalid = CharDataInvalid;
+    if (CharDataInvalid) {
+      Buffer = "";
+      return 0;
+    }
+  }
+  
+  // If this token contains nothing interesting, return it directly.
+  if (!Tok.needsCleaning()) {
+    Buffer = TokStart;
+    return Tok.getLength();
+  }
+  
+  // Otherwise, hard case, relex the characters into the string.
+  char *OutBuf = const_cast<char*>(Buffer);
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+    Ptr += CharSize;
+  }
+  assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  
+  return OutBuf-Buffer;
+}
+
+
+
 static bool isWhitespace(unsigned char c);
 
 /// MeasureTokenLength - Relex the token at the specified location and return
author	Chris Lattner <sabre@nondot.org>	2010-11-17 07:26:20 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-11-17 07:26:20 +0000
commit	b0607279cb98bbf2bbfe0db170aed39ef91e86a2 (patch)
tree	69ceb8b0789f7c17c16d67617d67c966ef367cc5 /lib/Lex/Lexer.cpp
parent	75072f2093995eb7ae0c0fa03bd439bbe8429d97 (diff)