diff options
Diffstat (limited to 'lib/Lex')
-rw-r--r-- | lib/Lex/Lexer.cpp | 31 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 28 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 26 |
3 files changed, 65 insertions, 20 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 6cd1873e28..b4cafb49f8 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -548,6 +548,11 @@ static void InitCharacterInfo() { isInited = true; } +/// isIdentifierStart - Return true if this is the start character of an +/// identifier, which is [a-zA-Z_]. +static inline bool isIdentifierStart(unsigned char c) { + return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false; +} /// isIdentifierBody - Return true if this is the body character of an /// identifier, which is [a-zA-Z0-9_]. @@ -982,8 +987,30 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { // Update the location of the token as well as the BufferPtr instance var. const char *TokStart = BufferPtr; - FormTokenWithChars(Result, CurPtr, - Wide ? tok::wide_string_literal : tok::string_literal); + tok::TokenKind Kind = Wide ? tok::wide_string_literal : tok::string_literal; + + // FIXME: Handle UCNs + unsigned Size; + if (PP && PP->getLangOptions().CPlusPlus0x && + isIdentifierStart(getCharAndSize(CurPtr, Size))) { + Result.makeUserDefinedLiteral(ExtraDataAllocator); + Result.setFlagValue(Token::LiteralPortionClean, !Result.needsCleaning()); + Result.setKind(Kind); + Result.setLiteralLength(CurPtr - BufferPtr); + + // FIXME: We hack around the lexer's routines a lot here. + BufferPtr = CurPtr; + bool OldRawMode = LexingRawMode; + LexingRawMode = true; + LexIdentifier(Result, ConsumeChar(CurPtr, Size, Result)); + LexingRawMode = OldRawMode; + PP->LookUpIdentifierInfo(Result, CurPtr); + + CurPtr = BufferPtr; + BufferPtr = TokStart; + } + + FormTokenWithChars(Result, CurPtr, Kind); Result.setLiteralData(TokStart); } diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index a12c4ae0d4..eb7337a565 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -758,30 +758,38 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, /// StringLiteralParser:: StringLiteralParser(const Token *StringToks, unsigned NumStringToks, - Preprocessor &pp, bool Complain) : PP(pp) { + Preprocessor &pp, bool Complain) : PP(pp), hadError(false) { // Scan all of the string portions, remember the max individual token length, // computing a bound on the concatenated string length, and see whether any // piece is a wide-string. If any of the string portions is a wide-string // literal, the result is a wide-string literal [C99 6.4.5p4]. - MaxTokenLength = StringToks[0].getLength(); - SizeBound = StringToks[0].getLength()-2; // -2 for "". + MaxTokenLength = StringToks[0].getLiteralLength(); + SizeBound = StringToks[0].getLiteralLength()-2; // -2 for "". AnyWide = StringToks[0].is(tok::wide_string_literal); - - hadError = false; + UDSuffix = StringToks[0].getIdentifierInfo(); // Implement Translation Phase #6: concatenation of string literals /// (C99 5.1.1.2p1). The common case is only one string fragment. for (unsigned i = 1; i != NumStringToks; ++i) { // The string could be shorter than this if it needs cleaning, but this is a // reasonable bound, which is all we need. - SizeBound += StringToks[i].getLength()-2; // -2 for "". + SizeBound += StringToks[i].getLiteralLength()-2; // -2 for "". // Remember maximum string piece length. - if (StringToks[i].getLength() > MaxTokenLength) - MaxTokenLength = StringToks[i].getLength(); + if (StringToks[i].getLiteralLength() > MaxTokenLength) + MaxTokenLength = StringToks[i].getLiteralLength(); // Remember if we see any wide strings. AnyWide |= StringToks[i].is(tok::wide_string_literal); + + if (StringToks[i].isUserDefinedLiteral()) { + if (UDSuffix && UDSuffix != StringToks[i].getIdentifierInfo()) { + // FIXME: Improve location and note previous + PP.Diag(StringToks[0].getLocation(), diag::err_ud_suffix_mismatch); + hadError = true; + } else if (!UDSuffix) + UDSuffix = StringToks[0].getIdentifierInfo(); + } } // Include space for the null terminator. @@ -823,7 +831,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, // and 'spelled' tokens can only shrink. bool StringInvalid = false; unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, - &StringInvalid); + &StringInvalid, true); if (StringInvalid) { hadError = 1; continue; @@ -938,7 +946,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, bool Complain) { // Get the spelling of the token. llvm::SmallString<16> SpellingBuffer; - SpellingBuffer.resize(Tok.getLength()); + SpellingBuffer.resize(Tok.getLiteralLength()); bool StringInvalid = false; const char *SpellingPtr = &SpellingBuffer[0]; diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 5160acf19e..f52d35494a 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -352,15 +352,25 @@ std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const { /// to point to a constant buffer with the data already in it (avoiding a /// copy). The caller is not allowed to modify the returned buffer pointer /// if an internal buffer is returned. -unsigned Preprocessor::getSpelling(const Token &Tok, - const char *&Buffer, bool *Invalid) const { +/// +/// If LiteralOnly is specified, only the literal portion of the token is +/// processed. +unsigned Preprocessor::getSpelling(const Token &Tok, const char *&Buffer, + bool *Invalid, bool LiteralOnly) const { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + assert((!LiteralOnly || Tok.isLiteral()) && + "LiteralOnly used on a non-literal token"); + + unsigned (Token::*getLength) () const = + LiteralOnly ? &Token::getLiteralLength : &Token::getLength; // If this token is an identifier, just return the string from the identifier // table, which is very quick. if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { - Buffer = II->getNameStart(); - return II->getLength(); + if (!Tok.isUserDefinedLiteral()) { + Buffer = II->getNameStart(); + return II->getLength(); + } } // Otherwise, compute the start of the token in the input lexer buffer. @@ -381,20 +391,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok, } // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) { + if (!(LiteralOnly ? Tok.literalNeedsCleaning() : Tok.needsCleaning())) { Buffer = TokStart; - return Tok.getLength(); + return (Tok.*getLength)(); } // Otherwise, hard case, relex the characters into the string. char *OutBuf = const_cast<char*>(Buffer); - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + for (const char *Ptr = TokStart, *End = TokStart+(Tok.*getLength)(); Ptr != End; ) { unsigned CharSize; *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); Ptr += CharSize; } - assert(unsigned(OutBuf-Buffer) != Tok.getLength() && + assert(unsigned(OutBuf-Buffer) != (Tok.*getLength)() && "NeedsCleaning flag set on something that didn't need cleaning!"); return OutBuf-Buffer; |