aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex')
-rw-r--r--lib/Lex/Lexer.cpp31
-rw-r--r--lib/Lex/LiteralSupport.cpp28
-rw-r--r--lib/Lex/Preprocessor.cpp26
3 files changed, 65 insertions, 20 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 6cd1873e28..b4cafb49f8 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -548,6 +548,11 @@ static void InitCharacterInfo() {
isInited = true;
}
+/// isIdentifierStart - Return true if this is the start character of an
+/// identifier, which is [a-zA-Z_].
+static inline bool isIdentifierStart(unsigned char c) {
+ return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
+}
/// isIdentifierBody - Return true if this is the body character of an
/// identifier, which is [a-zA-Z0-9_].
@@ -982,8 +987,30 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
// Update the location of the token as well as the BufferPtr instance var.
const char *TokStart = BufferPtr;
- FormTokenWithChars(Result, CurPtr,
- Wide ? tok::wide_string_literal : tok::string_literal);
+ tok::TokenKind Kind = Wide ? tok::wide_string_literal : tok::string_literal;
+
+ // FIXME: Handle UCNs
+ unsigned Size;
+ if (PP && PP->getLangOptions().CPlusPlus0x &&
+ isIdentifierStart(getCharAndSize(CurPtr, Size))) {
+ Result.makeUserDefinedLiteral(ExtraDataAllocator);
+ Result.setFlagValue(Token::LiteralPortionClean, !Result.needsCleaning());
+ Result.setKind(Kind);
+ Result.setLiteralLength(CurPtr - BufferPtr);
+
+ // FIXME: We hack around the lexer's routines a lot here.
+ BufferPtr = CurPtr;
+ bool OldRawMode = LexingRawMode;
+ LexingRawMode = true;
+ LexIdentifier(Result, ConsumeChar(CurPtr, Size, Result));
+ LexingRawMode = OldRawMode;
+ PP->LookUpIdentifierInfo(Result, CurPtr);
+
+ CurPtr = BufferPtr;
+ BufferPtr = TokStart;
+ }
+
+ FormTokenWithChars(Result, CurPtr, Kind);
Result.setLiteralData(TokStart);
}
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index a12c4ae0d4..eb7337a565 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -758,30 +758,38 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
///
StringLiteralParser::
StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
- Preprocessor &pp, bool Complain) : PP(pp) {
+ Preprocessor &pp, bool Complain) : PP(pp), hadError(false) {
// Scan all of the string portions, remember the max individual token length,
// computing a bound on the concatenated string length, and see whether any
// piece is a wide-string. If any of the string portions is a wide-string
// literal, the result is a wide-string literal [C99 6.4.5p4].
- MaxTokenLength = StringToks[0].getLength();
- SizeBound = StringToks[0].getLength()-2; // -2 for "".
+ MaxTokenLength = StringToks[0].getLiteralLength();
+ SizeBound = StringToks[0].getLiteralLength()-2; // -2 for "".
AnyWide = StringToks[0].is(tok::wide_string_literal);
-
- hadError = false;
+ UDSuffix = StringToks[0].getIdentifierInfo();
// Implement Translation Phase #6: concatenation of string literals
/// (C99 5.1.1.2p1). The common case is only one string fragment.
for (unsigned i = 1; i != NumStringToks; ++i) {
// The string could be shorter than this if it needs cleaning, but this is a
// reasonable bound, which is all we need.
- SizeBound += StringToks[i].getLength()-2; // -2 for "".
+ SizeBound += StringToks[i].getLiteralLength()-2; // -2 for "".
// Remember maximum string piece length.
- if (StringToks[i].getLength() > MaxTokenLength)
- MaxTokenLength = StringToks[i].getLength();
+ if (StringToks[i].getLiteralLength() > MaxTokenLength)
+ MaxTokenLength = StringToks[i].getLiteralLength();
// Remember if we see any wide strings.
AnyWide |= StringToks[i].is(tok::wide_string_literal);
+
+ if (StringToks[i].isUserDefinedLiteral()) {
+ if (UDSuffix && UDSuffix != StringToks[i].getIdentifierInfo()) {
+ // FIXME: Improve location and note previous
+ PP.Diag(StringToks[0].getLocation(), diag::err_ud_suffix_mismatch);
+ hadError = true;
+ } else if (!UDSuffix)
+ UDSuffix = StringToks[0].getIdentifierInfo();
+ }
}
// Include space for the null terminator.
@@ -823,7 +831,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
// and 'spelled' tokens can only shrink.
bool StringInvalid = false;
unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf,
- &StringInvalid);
+ &StringInvalid, true);
if (StringInvalid) {
hadError = 1;
continue;
@@ -938,7 +946,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
bool Complain) {
// Get the spelling of the token.
llvm::SmallString<16> SpellingBuffer;
- SpellingBuffer.resize(Tok.getLength());
+ SpellingBuffer.resize(Tok.getLiteralLength());
bool StringInvalid = false;
const char *SpellingPtr = &SpellingBuffer[0];
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 5160acf19e..f52d35494a 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -352,15 +352,25 @@ std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const {
/// to point to a constant buffer with the data already in it (avoiding a
/// copy). The caller is not allowed to modify the returned buffer pointer
/// if an internal buffer is returned.
-unsigned Preprocessor::getSpelling(const Token &Tok,
- const char *&Buffer, bool *Invalid) const {
+///
+/// If LiteralOnly is specified, only the literal portion of the token is
+/// processed.
+unsigned Preprocessor::getSpelling(const Token &Tok, const char *&Buffer,
+ bool *Invalid, bool LiteralOnly) const {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+ assert((!LiteralOnly || Tok.isLiteral()) &&
+ "LiteralOnly used on a non-literal token");
+
+ unsigned (Token::*getLength) () const =
+ LiteralOnly ? &Token::getLiteralLength : &Token::getLength;
// If this token is an identifier, just return the string from the identifier
// table, which is very quick.
if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
- Buffer = II->getNameStart();
- return II->getLength();
+ if (!Tok.isUserDefinedLiteral()) {
+ Buffer = II->getNameStart();
+ return II->getLength();
+ }
}
// Otherwise, compute the start of the token in the input lexer buffer.
@@ -381,20 +391,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
}
// If this token contains nothing interesting, return it directly.
- if (!Tok.needsCleaning()) {
+ if (!(LiteralOnly ? Tok.literalNeedsCleaning() : Tok.needsCleaning())) {
Buffer = TokStart;
- return Tok.getLength();
+ return (Tok.*getLength)();
}
// Otherwise, hard case, relex the characters into the string.
char *OutBuf = const_cast<char*>(Buffer);
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+ for (const char *Ptr = TokStart, *End = TokStart+(Tok.*getLength)();
Ptr != End; ) {
unsigned CharSize;
*OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
Ptr += CharSize;
}
- assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+ assert(unsigned(OutBuf-Buffer) != (Tok.*getLength)() &&
"NeedsCleaning flag set on something that didn't need cleaning!");
return OutBuf-Buffer;