aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Smith <richard-llvm@metafoo.co.uk>2012-11-28 07:29:00 +0000
committerRichard Smith <richard-llvm@metafoo.co.uk>2012-11-28 07:29:00 +0000
commit30cddaec99fa6c3207613efdaedbb51dd8d70c77 (patch)
tree8dfd8b004454cf98e66f2a77f5738d8d906755cb
parentacf796b4797c5b3e9e237148fa622afdc04b3eff (diff)
Teach Lexer::getSpelling about raw string literals. Specifically, if a raw
string literal needs cleaning (because it contains line-splicing in the encoding prefix or in the ud-suffix), do not clean the section between the double-quotes -- that's the "raw" bit! git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@168776 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Lex/Lexer.cpp109
-rw-r--r--test/CXX/lex/lex.literal/lex.ext/p5.cpp7
-rw-r--r--test/CodeGen/string-literal.c7
3 files changed, 81 insertions, 42 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 4698e288c0..6cd18469e4 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -233,16 +233,67 @@ void Lexer::Stringify(SmallVectorImpl<char> &Str) {
// Token Spelling
//===----------------------------------------------------------------------===//
+/// \brief Slow case of getSpelling. Extract the characters comprising the
+/// spelling of this token from the provided input buffer.
+static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
+ const LangOptions &LangOpts, char *Spelling) {
+ assert(Tok.needsCleaning() && "getSpellingSlow called on simple token");
+
+ size_t Length = 0;
+ const char *BufEnd = BufPtr + Tok.getLength();
+
+ if (Tok.is(tok::string_literal)) {
+ // Munch the encoding-prefix and opening double-quote.
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+
+ if (Spelling[Length - 1] == '"')
+ break;
+ }
+
+ // Raw string literals need special handling; trigraph expansion and line
+ // splicing do not occur within their d-char-sequence nor within their
+ // r-char-sequence.
+ if (Length >= 2 &&
+ Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') {
+ // Search backwards from the end of the token to find the matching closing
+ // quote.
+ const char *RawEnd = BufEnd;
+ do --RawEnd; while (*RawEnd != '"');
+ size_t RawLength = RawEnd - BufPtr + 1;
+
+ // Everything between the quotes is included verbatim in the spelling.
+ memcpy(Spelling + Length, BufPtr, RawLength);
+ Length += RawLength;
+ BufPtr += RawLength;
+
+ // The rest of the token is lexed normally.
+ }
+ }
+
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+ }
+
+ assert(Length < Tok.getLength() &&
+ "NeedsCleaning flag set on token that didn't need cleaning!");
+ return Length;
+}
+
/// getSpelling() - Return the 'spelling' of this token. The spelling of a
/// token are the characters used to represent the token in the source file
/// after trigraph expansion and escaped-newline folding. In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
StringRef Lexer::getSpelling(SourceLocation loc,
- SmallVectorImpl<char> &buffer,
- const SourceManager &SM,
- const LangOptions &options,
- bool *invalid) {
+ SmallVectorImpl<char> &buffer,
+ const SourceManager &SM,
+ const LangOptions &options,
+ bool *invalid) {
// Break down the source location.
std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc);
@@ -267,17 +318,10 @@ StringRef Lexer::getSpelling(SourceLocation loc,
// Common case: no need for cleaning.
if (!token.needsCleaning())
return StringRef(tokenBegin, length);
-
- // Hard case, we need to relex the characters into the string.
- buffer.clear();
- buffer.reserve(length);
-
- for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) {
- unsigned charSize;
- buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options));
- ti += charSize;
- }
+ // Hard case, we need to relex the characters into the string.
+ buffer.resize(length);
+ buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data()));
return StringRef(buffer.data(), buffer.size());
}
@@ -289,31 +333,22 @@ StringRef Lexer::getSpelling(SourceLocation loc,
std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
const LangOptions &LangOpts, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
-
- // If this token contains nothing interesting, return it directly.
+
bool CharDataInvalid = false;
- const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
+ const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
&CharDataInvalid);
if (Invalid)
*Invalid = CharDataInvalid;
if (CharDataInvalid)
return std::string();
-
+
+ // If this token contains nothing interesting, return it directly.
if (!Tok.needsCleaning())
- return std::string(TokStart, TokStart+Tok.getLength());
-
+ return std::string(TokStart, TokStart + Tok.getLength());
+
std::string Result;
- Result.reserve(Tok.getLength());
-
- // Otherwise, hard case, relex the characters into the string.
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
- Ptr != End; ) {
- unsigned CharSize;
- Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts));
- Ptr += CharSize;
- }
- assert(Result.size() != unsigned(Tok.getLength()) &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
+ Result.resize(Tok.getLength());
+ Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
return Result;
}
@@ -365,17 +400,7 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
}
// Otherwise, hard case, relex the characters into the string.
- char *OutBuf = const_cast<char*>(Buffer);
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
- Ptr != End; ) {
- unsigned CharSize;
- *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts);
- Ptr += CharSize;
- }
- assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
-
- return OutBuf-Buffer;
+ return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
}
diff --git a/test/CXX/lex/lex.literal/lex.ext/p5.cpp b/test/CXX/lex/lex.literal/lex.ext/p5.cpp
index 4655aa17dc..06c091d8ac 100644
--- a/test/CXX/lex/lex.literal/lex.ext/p5.cpp
+++ b/test/CXX/lex/lex.literal/lex.ext/p5.cpp
@@ -11,3 +11,10 @@ double &i3 = L"foo"_x1; // expected-error {{no matching literal operator}}
char &operator "" _x1(const wchar_t *, size_t);
char &i4 = L"foo"_x1; // ok
double &i5 = R"(foo)"_x1; // ok
+double &i6 = u\
+8\
+R\
+"(foo)"\
+_\
+x\
+1; // ok
diff --git a/test/CodeGen/string-literal.c b/test/CodeGen/string-literal.c
index 12d431a454..962b19d3dd 100644
--- a/test/CodeGen/string-literal.c
+++ b/test/CodeGen/string-literal.c
@@ -76,5 +76,12 @@ def)";
const char *q = R"(abc
def)" "ghi";
+ // CHECK-CPP0X: private unnamed_addr constant [13 x i8] c"abc\5C\0A??=\0Adef\00", align 1
+ const char *r = R\
+"(abc\
+??=
+def)";
+
+
#endif
}