diff options
28 files changed, 375 insertions, 83 deletions
diff --git a/include/clang/AST/ExprCXX.h b/include/clang/AST/ExprCXX.h index 450348b9d2..1529b0ca13 100644 --- a/include/clang/AST/ExprCXX.h +++ b/include/clang/AST/ExprCXX.h @@ -2422,6 +2422,50 @@ public: virtual child_iterator child_end(); }; +/// UDLiteralExpr - An expression for a user-defined +/// string literal (e.g. "foo"_bar) +/// +/// Both the DeclRefExpr and the IntegerConstant are fictional expressions +/// generated from the literal. +class UDLiteralExpr : public CallExpr { + Expr *BaseLiteral; + + static bool isValidLiteral(Expr *E) { + return isa<StringLiteral>(E) || isa<FloatingLiteral>(E) || + isa<IntegerLiteral>(E) || isa<CharacterLiteral>(E); + } +public: + UDLiteralExpr(ASTContext &C, Expr *E, Expr *fn, Expr **args, + unsigned numargs, QualType t) + : CallExpr(C, UDLiteralExprClass, fn, args, numargs, t, SourceLocation()) + , BaseLiteral(E) { + assert(isValidLiteral(E) && "Base literal must be an actual literal"); + } + + FunctionDecl *getLiteralOperator() { return getDirectCallee(); } + const FunctionDecl *getLiteralOperator() const { return getDirectCallee(); } + + Expr *getBaseLiteral() { return BaseLiteral; } + const Expr *getBaseLiteral() const { return BaseLiteral; } + void setBaseLiteral(Expr *E) { + assert(isValidLiteral(E) && "Base literal must be an actual literal"); + BaseLiteral = E; + } + + IdentifierInfo *getUDSuffix() const { + return getLiteralOperator()->getDeclName().getCXXLiteralIdentifier(); + } + + virtual SourceRange getSourceRange() const { + return getBaseLiteral()->getSourceRange(); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == UDLiteralExprClass; + } + static bool classof(const UDLiteralExpr *) { return true; } +}; + inline ExplicitTemplateArgumentList &OverloadExpr::getExplicitTemplateArgs() { if (isa<UnresolvedLookupExpr>(this)) return cast<UnresolvedLookupExpr>(this)->getExplicitTemplateArgs(); diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h index 79ac072fa1..a360cbd427 100644 --- a/include/clang/AST/RecursiveASTVisitor.h +++ b/include/clang/AST/RecursiveASTVisitor.h @@ -1694,6 +1694,7 @@ DEF_TRAVERSE_STMT(CXXTemporaryObjectExpr, { DEF_TRAVERSE_STMT(CallExpr, { }) DEF_TRAVERSE_STMT(CXXMemberCallExpr, { }) DEF_TRAVERSE_STMT(CXXOperatorCallExpr, { }) +DEF_TRAVERSE_STMT(UDLiteralExpr, { }) // These operators (all of them) do not need any action except // iterating over the children. diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td index 07f53d9867..2896a2f89e 100644 --- a/include/clang/Basic/DiagnosticLexKinds.td +++ b/include/clang/Basic/DiagnosticLexKinds.td @@ -99,6 +99,9 @@ def ext_string_too_long : Extension<"string literal of length %0 exceeds " "maximum length %1 that %select{C90|ISO C99|C++}2 compilers are required to " "support">, InGroup<OverlengthStrings>; +def err_ud_suffix_mismatch : Error<"User-defined literal suffixes on adjacent " + "string literal tokens do not match">; + //===----------------------------------------------------------------------===// // PTH Diagnostics //===----------------------------------------------------------------------===// diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 32550dc616..e85ce4f483 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -2946,9 +2946,15 @@ def err_operator_delete_param_type : Error< // C++ literal operators def err_literal_operator_outside_namespace : Error< "literal operator %0 must be in a namespace or global scope">; -// FIXME: This diagnostic sucks +def warn_literal_operator_no_underscore : Warning< "literal operator names not " + "beginning with underscores are reserved for future standardization">; +def err_literal_operator_overload : Error< + "no matching literal operator function for user-defined suffix '%0'">; +def err_literal_operator_deleted : Error< + "deleted literal operator function for user-defined suffix '%0'">; +// FIXME: This should really provide information about what is allowed. def err_literal_operator_params : Error< - "parameter declaration for literal operator %0 is not valid">; + "parameter declaration for literal operator '%0' is not valid">; // C++ conversion functions def err_conv_function_not_member : Error< diff --git a/include/clang/Basic/StmtNodes.td b/include/clang/Basic/StmtNodes.td index a2f69730a0..2b446c674a 100644 --- a/include/clang/Basic/StmtNodes.td +++ b/include/clang/Basic/StmtNodes.td @@ -112,6 +112,9 @@ def OverloadExpr : DStmt<Expr, 1>; def UnresolvedLookupExpr : DStmt<OverloadExpr>; def UnresolvedMemberExpr : DStmt<OverloadExpr>; +// C++0x expressions +def UDLiteralExpr : DStmt<Expr>; + // Obj-C Expressions. def ObjCStringLiteral : DStmt<Expr>; def ObjCEncodeExpr : DStmt<Expr>; diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 9e0fb7ee70..e05113da76 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -17,6 +17,7 @@ #include "clang/Lex/PreprocessorLexer.h" #include "clang/Basic/LangOptions.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" #include <string> #include <vector> #include <cassert> @@ -67,6 +68,9 @@ class Lexer : public PreprocessorLexer { // line" flag set on it. bool IsAtStartOfLine; + // ExtraDataAllocator - An allocator for extra data on a token. + llvm::BumpPtrAllocator ExtraDataAllocator; + Lexer(const Lexer&); // DO NOT IMPLEMENT void operator=(const Lexer&); // DO NOT IMPLEMENT friend class Preprocessor; diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h index ba46fb18a6..40112f5348 100644 --- a/include/clang/Lex/LiteralSupport.h +++ b/include/clang/Lex/LiteralSupport.h @@ -27,6 +27,7 @@ class Preprocessor; class Token; class SourceLocation; class TargetInfo; +class IdentifierInfo; /// NumericLiteralParser - This performs strict semantic analysis of the content /// of a ppnumber, classifying it as either integer, floating, or erroneous, @@ -145,6 +146,7 @@ class StringLiteralParser { unsigned wchar_tByteWidth; llvm::SmallString<512> ResultBuf; char *ResultPtr; // cursor + IdentifierInfo *UDSuffix; public: StringLiteralParser(const Token *StringToks, unsigned NumStringToks, Preprocessor &PP, bool Complain = true); @@ -155,6 +157,9 @@ public: const char *GetString() { return &ResultBuf[0]; } unsigned GetStringLength() const { return ResultPtr-&ResultBuf[0]; } + bool isUserDefinedLiteral() const { return UDSuffix; } + IdentifierInfo *getUDSuffix() const { return UDSuffix; } + unsigned GetNumStringChars() const { if (AnyWide) return GetStringLength() / wchar_tByteWidth; diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 6b9b89ea5e..94e858dfb0 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -645,7 +645,7 @@ public: /// copy). The caller is not allowed to modify the returned buffer pointer /// if an internal buffer is returned. unsigned getSpelling(const Token &Tok, const char *&Buffer, - bool *Invalid = 0) const; + bool *Invalid = 0, bool LiteralOnly = false) const; /// getSpelling - This method is used to get the spelling of a token into a /// SmallVector. Note that the returned StringRef may not point to the diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h index bd9b46869a..e6f7c7c3f5 100644 --- a/include/clang/Lex/Token.h +++ b/include/clang/Lex/Token.h @@ -14,16 +14,16 @@ #ifndef LLVM_CLANG_TOKEN_H #define LLVM_CLANG_TOKEN_H +#include "llvm/Support/Allocator.h" #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/OperatorKinds.h" +#include "clang/Basic/IdentifierTable.h" #include <cstdlib> namespace clang { -class IdentifierInfo; - /// Token - This structure provides full information about a lexed token. /// It is not intended to be space efficient, it is intended to return as much /// information as possible about each returned token. This is expected to be @@ -34,6 +34,14 @@ class IdentifierInfo; /// can be represented by a single typename annotation token that carries /// information about the SourceRange of the tokens and the type object. class Token { + /// An extra-large structure for storing the data needed for a user-defined + /// literal - the raw literal, and the identifier suffix. + struct UDLData { + IdentifierInfo *II; + const char *LiteralData; + unsigned LiteralLength; + }; + /// The location of the token. SourceLocation Loc; @@ -47,7 +55,7 @@ class Token { /// token. unsigned UintData; - /// PtrData - This is a union of four different pointer types, which depends + /// PtrData - This is a union of five different pointer types, which depends /// on what type of token this is: /// Identifiers, keywords, etc: /// This is an IdentifierInfo*, which contains the uniqued identifier @@ -55,6 +63,8 @@ class Token { /// Literals: isLiteral() returns true. /// This is a pointer to the start of the token in a text buffer, which /// may be dirty (have trigraphs / escaped newlines). + /// User-defined literals: isUserDefinedLiteral() returns true. + /// This is a pointer to a UDLData. /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). /// This is a pointer to sema-specific data for the annotation token. /// Other: @@ -71,12 +81,14 @@ class Token { unsigned char Flags; public: - // Various flags set per token: + /// Various flags set per token: enum TokenFlags { - StartOfLine = 0x01, // At start of line or only after whitespace. - LeadingSpace = 0x02, // Whitespace exists before this token. - DisableExpand = 0x04, // This identifier may never be macro expanded. - NeedsCleaning = 0x08 // Contained an escaped newline or trigraph. + StartOfLine = 0x01, ///< At start of line or only after whitespace + LeadingSpace = 0x02, ///< Whitespace exists before this token + DisableExpand = 0x04, ///< This identifier may never be macro expanded + NeedsCleaning = 0x08, ///< Contained an escaped newline or trigraph + UserDefinedLiteral = 0x10, ///< This literal has a ud-suffix + LiteralPortionClean = 0x20 ///< A UDL's literal portion needs no cleaning }; tok::TokenKind getKind() const { return (tok::TokenKind)Kind; } @@ -108,12 +120,34 @@ public: assert(!isAnnotation() && "Annotation tokens have no length field"); return UintData; } + /// getLiteralLength - Return the length of the literal portion of the token, + /// which may not be the token length if this is a user-defined literal. + unsigned getLiteralLength() const { + assert(isLiteral() && "Using getLiteralLength on a non-literal token"); + if (isUserDefinedLiteral()) + return reinterpret_cast<UDLData*>(PtrData)->LiteralLength; + else + return UintData; + } void setLocation(SourceLocation L) { Loc = L; } void setLength(unsigned Len) { assert(!isAnnotation() && "Annotation tokens have no length field"); UintData = Len; } + void setLiteralLength(unsigned Len) { + assert(isLiteral() && "Using setLiteralLength on a non-literal token"); + if (isUserDefinedLiteral()) + reinterpret_cast<UDLData*>(PtrData)->LiteralLength = Len; + else + UintData = Len; + } + + /// makeUserDefinedLiteral - Set this token to be a user-defined literal + void makeUserDefinedLiteral(llvm::BumpPtrAllocator &Alloc) { + PtrData = Alloc.Allocate(sizeof(UDLData), 4); + setFlag(UserDefinedLiteral); + } SourceLocation getAnnotationEndLoc() const { assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); @@ -154,11 +188,18 @@ public: IdentifierInfo *getIdentifierInfo() const { assert(!isAnnotation() && "Used IdentInfo on annotation token!"); - if (isLiteral()) return 0; - return (IdentifierInfo*) PtrData; + if (isUserDefinedLiteral()) + return reinterpret_cast<UDLData*>(PtrData)->II; + else if (isLiteral()) + return 0; + else + return reinterpret_cast<IdentifierInfo*>(PtrData); } void setIdentifierInfo(IdentifierInfo *II) { - PtrData = (void*) II; + if (isUserDefinedLiteral()) + reinterpret_cast<UDLData*>(PtrData)->II = II; + else + PtrData = (void*)II; } /// getLiteralData - For a literal token (numeric constant, string, etc), this @@ -166,11 +207,17 @@ public: /// otherwise. const char *getLiteralData() const { assert(isLiteral() && "Cannot get literal data of non-literal"); - return reinterpret_cast<const char*>(PtrData); + if (isUserDefinedLiteral()) + return reinterpret_cast<UDLData*>(PtrData)->LiteralData; + else + return reinterpret_cast<const char*>(PtrData); } void setLiteralData(const char *Ptr) { assert(isLiteral() && "Cannot set literal data of non-literal"); - PtrData = const_cast<char*>(Ptr); + if (isUserDefinedLiteral()) + reinterpret_cast<UDLData*>(PtrData)->LiteralData = Ptr; + else + PtrData = const_cast<char*>(Ptr); } void *getAnnotationValue() const { @@ -221,6 +268,12 @@ public: return (Flags & DisableExpand) ? true : false; } + /// isUserDefinedLiteral - Return true if this is a C++0x user-defined literal + /// token. + bool isUserDefinedLiteral() const { + return (Flags & UserDefinedLiteral) ? true : false; + } + /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; @@ -229,8 +282,17 @@ public: /// needsCleaning - Return true if this token has trigraphs or escaped /// newlines in it. - /// - bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; } + bool needsCleaning() const { + return (Flags & NeedsCleaning) ? true : false; + } + + /// literalNeedsCleaning - Return true if the literal portion of this token + /// needs cleaning. + bool literalNeedsCleaning() const { + assert(isLiteral() && "Using literalNeedsCleaning on a non-literal token"); + return (Flags & NeedsCleaning) ? ((Flags & LiteralPortionClean) ? false : true) + : false; + } }; /// PPConditionalInfo - Information about the conditional stack (#if directives) diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index 5558eae4da..d8c8b02499 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -1752,8 +1752,8 @@ public: /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). - virtual ExprResult ActOnStringLiteral(const Token *Toks, - unsigned NumToks); + virtual ExprResult ActOnStringLiteral(Scope *S, const Token *Toks, + unsigned NumToks); // Binary/Unary Operators. 'Tok' is the token for the operator. ExprResult CreateBuiltinUnaryOp(SourceLocation OpLoc, @@ -2747,6 +2747,9 @@ public: bool CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl); + ExprResult BuildUDStringLiteralExpr(Scope *S, StringLiteral *SL, unsigned L, + IdentifierInfo *II); + //===--------------------------------------------------------------------===// // C++ Templates [C++ 14] // diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp index 14cbbaf0e8..d067668f4b 100644 --- a/lib/AST/ExprConstant.cpp +++ b/lib/AST/ExprConstant.cpp @@ -2423,7 +2423,8 @@ static ICEDiag CheckICE(const Expr* E, ASTContext &Ctx) { case Expr::UnaryTypeTraitExprClass: return NoDiag(); case Expr::CallExprClass: - case Expr::CXXOperatorCallExprClass: { + case Expr::CXXOperatorCallExprClass: + case Expr::UDLiteralExprClass: { const CallExpr *CE = cast<CallExpr>(E); if (CE->isBuiltinCall(Ctx)) return CheckEvalInICE(E, Ctx); diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp index ea528c2dae..084bdb3ed6 100644 --- a/lib/AST/StmtPrinter.cpp +++ b/lib/AST/StmtPrinter.cpp @@ -1199,6 +1199,11 @@ void StmtPrinter::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *Node) { } } +void StmtPrinter::VisitUDLiteralExpr(UDLiteralExpr *Node) { + VisitStmt(Node->getBaseLiteral()); + OS << Node->getUDSuffix()->getName(); +} + static const char *getTypeTraitName(UnaryTypeTrait UTT) { switch (UTT) { default: assert(false && "Unknown type trait"); diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp index 098aec0a19..79454748dd 100644 --- a/lib/AST/StmtProfile.cpp +++ b/lib/AST/StmtProfile.cpp @@ -828,6 +828,12 @@ void StmtProfiler::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *S) { VisitTemplateArguments(S->getTemplateArgs(), S->getNumTemplateArgs()); } +void StmtProfiler::VisitUDLiteralExpr(UDLiteralExpr *S) { + VisitExpr(S); + VisitStmt(S->getBaseLiteral()); + ID.AddString(S->getUDSuffix()->getName()); +} + void StmtProfiler::VisitObjCStringLiteral(ObjCStringLiteral *S) { VisitExpr(S); } diff --git a/lib/Checker/GRExprEngine.cpp b/lib/Checker/GRExprEngine.cpp index c9173aa92a..059749d16f 100644 --- a/lib/Checker/GRExprEngine.cpp +++ b/lib/Checker/GRExprEngine.cpp @@ -886,6 +886,7 @@ void GRExprEngine::Visit(const Stmt* S, ExplodedNode* Pred, } case Stmt::CallExprClass: + case Stmt::UDLiteralExprClass: case Stmt::CXXOperatorCallExprClass: { const CallExpr* C = cast<CallExpr>(S); VisitCall(C, Pred, C->arg_begin(), C->arg_end(), Dst, false); diff --git a/lib/CodeGen/Mangle.cpp b/lib/CodeGen/Mangle.cpp index c06b4fc699..51b07a1327 100644 --- a/lib/CodeGen/Mangle.cpp +++ b/lib/CodeGen/Mangle.cpp @@ -1634,6 +1634,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; case Expr::CXXMemberCallExprClass: // fallthrough + case Expr::UDLiteralExprClass: case Expr::CallExprClass: { const CallExpr *CE = cast<CallExpr>(E); Out << "cl"; diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 6cd1873e28..b4cafb49f8 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -548,6 +548,11 @@ static void InitCharacterInfo() { isInited = true; } +/// isIdentifierStart - Return true if this is the start character of an +/// identifier, which is [a-zA-Z_]. +static inline bool isIdentifierStart(unsigned char c) { + return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false; +} /// isIdentifierBody - Return true if this is the body character of an /// identifier, which is [a-zA-Z0-9_]. @@ -982,8 +987,30 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { // Update the location of the token as well as the BufferPtr instance var. const char *TokStart = BufferPtr; - FormTokenWithChars(Result, CurPtr, - Wide ? tok::wide_string_literal : tok::string_literal); + tok::TokenKind Kind = Wide ? tok::wide_string_literal : tok::string_literal; + + // FIXME: Handle UCNs + unsigned Size; + if (PP && PP->getLangOptions().CPlusPlus0x && + isIdentifierStart(getCharAndSize(CurPtr, Size))) { + Result.makeUserDefinedLiteral(ExtraDataAllocator); + Result.setFlagValue(Token::LiteralPortionClean, !Result.needsCleaning()); + Result.setKind(Kind); + Result.setLiteralLength(CurPtr - BufferPtr); + + // FIXME: We hack around the lexer's routines a lot here. + BufferPtr = CurPtr; + bool OldRawMode = LexingRawMode; + LexingRawMode = true; + LexIdentifier(Result, ConsumeChar(CurPtr, Size, Result)); + LexingRawMode = OldRawMode; + PP->LookUpIdentifierInfo(Result, CurPtr); + + CurPtr = BufferPtr; + BufferPtr = TokStart; + } + + FormTokenWithChars(Result, CurPtr, Kind); Result.setLiteralData(TokStart); } diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index a12c4ae0d4..eb7337a565 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -758,30 +758,38 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, /// StringLiteralParser:: StringLiteralParser(const Token *StringToks, unsigned NumStringToks, - Preprocessor &pp, bool Complain) : PP(pp) { + Preprocessor &pp, bool Complain) : PP(pp), hadError(false) { // Scan all of the string portions, remember the max individual token length, // computing a bound on the concatenated string length, and see whether any // piece is a wide-string. If any of the string portions is a wide-string // literal, the result is a wide-string literal [C99 6.4.5p4]. - MaxTokenLength = StringToks[0].getLength(); - SizeBound = StringToks[0].getLength()-2; // -2 for "". + MaxTokenLength = StringToks[0].getLiteralLength(); + SizeBound = StringToks[0].getLiteralLength()-2; // -2 for "". AnyWide = StringToks[0].is(tok::wide_string_literal); - - hadError = false; + UDSuffix = StringToks[0].getIdentifierInfo(); // Implement Translation Phase #6: concatenation of string literals /// (C99 5.1.1.2p1). The common case is only one string fragment. for (unsigned i = 1; i != NumStringToks; ++i) { // The string could be shorter than this if it needs cleaning, but this is a // reasonable bound, which is all we need. - SizeBound += StringToks[i].getLength()-2; // -2 for "". + SizeBound += StringToks[i].getLiteralLength()-2; // -2 for "". // Remember maximum string piece length. - if (StringToks[i].getLength() > MaxTokenLength) - MaxTokenLength = StringToks[i].getLength(); + if (StringToks[i].getLiteralLength() > MaxTokenLength) + MaxTokenLength = StringToks[i].getLiteralLength(); // Remember if we see any wide strings. AnyWide |= StringToks[i].is(tok::wide_string_literal); + + if (StringToks[i].isUserDefinedLiteral()) { + if (UDSuffix && UDSuffix != StringToks[i].getIdentifierInfo()) { + // FIXME: Improve location and note previous + PP.Diag(StringToks[0].getLocation(), diag::err_ud_suffix_mismatch); + hadError = true; + } else if (!UDSuffix) + UDSuffix = StringToks[0].getIdentifierInfo(); + } } // Include space for the null terminator. @@ -823,7 +831,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, // and 'spelled' tokens can only shrink. bool StringInvalid = false; unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, - &StringInvalid); + &StringInvalid, true); if (StringInvalid) { hadError = 1; continue; @@ -938,7 +946,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, bool Complain) { // Get the spelling of the token. llvm::SmallString<16> SpellingBuffer; - SpellingBuffer.resize(Tok.getLength()); + SpellingBuffer.resize(Tok.getLiteralLength()); bool StringInvalid = false; const char *SpellingPtr = &SpellingBuffer[0]; diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 5160acf19e..f52d35494a 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -352,15 +352,25 @@ std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const { /// to point to a constant buffer with the data already in it (avoiding a /// copy). The caller is not allowed to modify the returned buffer pointer /// if an internal buffer is returned. -unsigned Preprocessor::getSpelling(const Token &Tok, - const char *&Buffer, bool *Invalid) const { +/// +/// If LiteralOnly is specified, only the literal portion of the token is +/// processed. +unsigned Preprocessor::getSpelling(const Token &Tok, const char *&Buffer, + bool *Invalid, bool LiteralOnly) const { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + assert((!LiteralOnly || Tok.isLiteral()) && + "LiteralOnly used on a non-literal token"); + + unsigned (Token::*getLength) () const = + LiteralOnly ? &Token::getLiteralLength : &Token::getLength; // If this token is an identifier, just return the string from the identifier // table, which is very quick. if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { - Buffer = II->getNameStart(); - return II->getLength(); + if (!Tok.isUserDefinedLiteral()) { + Buffer = II->getNameStart(); + return II->getLength(); + } } // Otherwise, compute the start of the token in the input lexer buffer. @@ -381,20 +391,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok, } // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) { + if (!(LiteralOnly ? Tok.literalNeedsCleaning() : Tok.needsCleaning())) { Buffer = TokStart; - return Tok.getLength(); + return (Tok.*getLength)(); } // Otherwise, hard case, relex the characters into the string. char *OutBuf = const_cast<char*>(Buffer); - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + for (const char *Ptr = TokStart, *End = TokStart+(Tok.*getLength)(); Ptr != End; ) { unsigned CharSize; *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); Ptr += CharSize; } - assert(unsigned(OutBuf-Buffer) != Tok.getLength() && + assert(unsigned(OutBuf-Buffer) != (Tok.*getLength)() && "NeedsCleaning flag set on something that didn't need cleaning!"); return OutBuf-Buffer; diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index 290b72c4c0..2ade77a907 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -1568,7 +1568,8 @@ ExprResult Parser::ParseStringLiteralExpression() { } while (isTokenStringLiteral()); // Pass the set of string tokens, ready for concatenation, to the actions. - return Actions.ActOnStringLiteral(&StringToks[0], StringToks.size()); + return Actions.ActOnStringLiteral(getCurScope(), &StringToks[0], + StringToks.size()); } /// ParseExpressionList - Used for C/C++ (argument-)expression-list. diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp index af927285a4..94b8c3baac 100644 --- a/lib/Parse/ParseStmt.cpp +++ b/lib/Parse/ParseStmt.cpp @@ -1244,11 +1244,12 @@ StmtResult Parser::FuzzyParseMicrosoftAsmStatement() { Tok.isNot(tok::eof)); } Token t; + t.startToken(); t.setKind(tok::string_literal); t.setLiteralData("\"/*FIXME: not done*/\""); t.clearFlag(Token::NeedsCleaning); t.setLength(21); - ExprResult AsmString(Actions.ActOnStringLiteral(&t, 1)); + ExprResult AsmString(Actions.ActOnStringLiteral(getCurScope(), &t, 1)); ExprVector Constraints(Actions); ExprVector Exprs(Actions); |