aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/clang/AST/Expr.h52
-rw-r--r--include/clang/AST/Type.h2
-rw-r--r--include/clang/Basic/DiagnosticLexKinds.td6
-rw-r--r--include/clang/Basic/IdentifierTable.h7
-rw-r--r--include/clang/Basic/TokenKinds.def12
-rw-r--r--include/clang/Lex/Lexer.h6
-rw-r--r--include/clang/Lex/LiteralSupport.h31
-rw-r--r--include/clang/Lex/Token.h5
-rw-r--r--include/clang/Lex/TokenConcatenation.h9
-rw-r--r--include/clang/Parse/Parser.h5
-rw-r--r--lib/AST/ASTImporter.cpp4
-rw-r--r--lib/AST/Expr.cpp9
-rw-r--r--lib/AST/StmtDumper.cpp9
-rw-r--r--lib/AST/StmtPrinter.cpp18
-rw-r--r--lib/AST/StmtProfile.cpp4
-rw-r--r--lib/AST/Type.cpp12
-rw-r--r--lib/CodeGen/CodeGenModule.cpp16
-rw-r--r--lib/Lex/Lexer.cpp77
-rw-r--r--lib/Lex/LiteralSupport.cpp157
-rw-r--r--lib/Lex/MacroArgs.cpp8
-rw-r--r--lib/Lex/PPDirectives.cpp4
-rw-r--r--lib/Lex/PPExpressions.cpp16
-rw-r--r--lib/Lex/Pragma.cpp6
-rw-r--r--lib/Lex/TokenConcatenation.cpp64
-rw-r--r--lib/Parse/ParseCXXInlineMethods.cpp3
-rw-r--r--lib/Parse/ParseExpr.cpp6
-rw-r--r--lib/Parse/ParseTentative.cpp6
-rw-r--r--lib/Parse/Parser.cpp3
-rw-r--r--lib/Rewrite/HTMLRewrite.cpp9
-rw-r--r--lib/Rewrite/RewriteObjC.cpp19
-rw-r--r--lib/Sema/SemaChecking.cpp4
-rw-r--r--lib/Sema/SemaDeclAttr.cpp12
-rw-r--r--lib/Sema/SemaExpr.cpp39
-rw-r--r--lib/Sema/SemaExprCXX.cpp20
-rw-r--r--lib/Sema/SemaExprObjC.cpp6
-rw-r--r--lib/Sema/SemaInit.cpp32
-rw-r--r--lib/Sema/SemaStmt.cpp8
-rw-r--r--lib/Sema/SemaTemplate.cpp18
-rw-r--r--lib/Serialization/ASTReaderStmt.cpp4
-rw-r--r--lib/Serialization/ASTWriterStmt.cpp4
-rw-r--r--test/CXX/lex/lex.literal/lex.ccon/p1.cpp7
-rw-r--r--test/CodeGen/char-literal.c44
-rw-r--r--test/CodeGen/string-literal.c23
-rw-r--r--test/Lexer/wchar.c4
-rw-r--r--test/Parser/char-literal-printing.c37
-rw-r--r--test/SemaCXX/type-convert-construct.cpp7
46 files changed, 608 insertions, 246 deletions
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h
index f623fd1d52..9e4c0f0915 100644
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1112,29 +1112,39 @@ public:
};
class CharacterLiteral : public Expr {
+public:
+ enum CharacterKind {
+ Ascii,
+ Wide,
+ UTF16,
+ UTF32
+ };
+
+private:
unsigned Value;
SourceLocation Loc;
- bool IsWide;
+ unsigned Kind : 2;
public:
// type should be IntTy
- CharacterLiteral(unsigned value, bool iswide, QualType type, SourceLocation l)
+ CharacterLiteral(unsigned value, CharacterKind kind, QualType type,
+ SourceLocation l)
: Expr(CharacterLiteralClass, type, VK_RValue, OK_Ordinary, false, false,
false, false),
- Value(value), Loc(l), IsWide(iswide) {
+ Value(value), Loc(l), Kind(kind) {
}
/// \brief Construct an empty character literal.
CharacterLiteral(EmptyShell Empty) : Expr(CharacterLiteralClass, Empty) { }
SourceLocation getLocation() const { return Loc; }
- bool isWide() const { return IsWide; }
+ CharacterKind getKind() const { return static_cast<CharacterKind>(Kind); }
SourceRange getSourceRange() const { return SourceRange(Loc); }
unsigned getValue() const { return Value; }
void setLocation(SourceLocation Location) { Loc = Location; }
- void setWide(bool W) { IsWide = W; }
+ void setKind(CharacterKind kind) { Kind = kind; }
void setValue(unsigned Val) { Value = Val; }
static bool classof(const Stmt *T) {
@@ -1243,13 +1253,23 @@ public:
/// In this case, getByteLength() will return 6, but the string literal will
/// have type "char[2]".
class StringLiteral : public Expr {
+public:
+ enum StringKind {
+ Ascii,
+ Wide,
+ UTF8,
+ UTF16,
+ UTF32
+ };
+
+private:
friend class ASTStmtReader;
const char *StrData;
unsigned ByteLength;
- bool IsWide;
- bool IsPascal;
unsigned NumConcatenated;
+ unsigned Kind : 3;
+ bool IsPascal : 1;
SourceLocation TokLocs[1];
StringLiteral(QualType Ty) :
@@ -1259,14 +1279,15 @@ class StringLiteral : public Expr {
public:
/// This is the "fully general" constructor that allows representation of
/// strings formed from multiple concatenated tokens.
- static StringLiteral *Create(ASTContext &C, StringRef Str, bool Wide,
+ static StringLiteral *Create(ASTContext &C, StringRef Str, StringKind Kind,
bool Pascal, QualType Ty,
const SourceLocation *Loc, unsigned NumStrs);
/// Simple constructor for string literals made from one token.
- static StringLiteral *Create(ASTContext &C, StringRef Str, bool Wide,
- bool Pascal, QualType Ty, SourceLocation Loc) {
- return Create(C, Str, Wide, Pascal, Ty, &Loc, 1);
+ static StringLiteral *Create(ASTContext &C, StringRef Str, StringKind Kind,
+ bool Pascal, QualType Ty,
+ SourceLocation Loc) {
+ return Create(C, Str, Kind, Pascal, Ty, &Loc, 1);
}
/// \brief Construct an empty string literal.
@@ -1281,9 +1302,14 @@ public:
/// \brief Sets the string data to the given string data.
void setString(ASTContext &C, StringRef Str);
- bool isWide() const { return IsWide; }
+ StringKind getKind() const { return static_cast<StringKind>(Kind); }
+ bool isAscii() const { return Kind == Ascii; }
+ bool isWide() const { return Kind == Wide; }
+ bool isUTF8() const { return Kind == UTF8; }
+ bool isUTF16() const { return Kind == UTF16; }
+ bool isUTF32() const { return Kind == UTF32; }
bool isPascal() const { return IsPascal; }
-
+
bool containsNonAsciiOrNull() const {
StringRef Str = getString();
for (unsigned i = 0, e = Str.size(); i != e; ++i)
diff --git a/include/clang/AST/Type.h b/include/clang/AST/Type.h
index 8a842da440..2b72610226 100644
--- a/include/clang/AST/Type.h
+++ b/include/clang/AST/Type.h
@@ -1368,6 +1368,8 @@ public:
bool isBooleanType() const;
bool isCharType() const;
bool isWideCharType() const;
+ bool isChar16Type() const;
+ bool isChar32Type() const;
bool isAnyCharacterType() const;
bool isIntegralType(ASTContext &Ctx) const;
diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td
index 9e431a2d21..e23921be0b 100644
--- a/include/clang/Basic/DiagnosticLexKinds.td
+++ b/include/clang/Basic/DiagnosticLexKinds.td
@@ -77,8 +77,8 @@ def err_invalid_suffix_integer_constant : Error<
"invalid suffix '%0' on integer constant">;
def err_invalid_suffix_float_constant : Error<
"invalid suffix '%0' on floating constant">;
-def warn_extraneous_wide_char_constant : Warning<
- "extraneous characters in wide character constant ignored">;
+def warn_extraneous_char_constant : Warning<
+ "extraneous characters in character constant ignored">;
def warn_char_constant_too_large : Warning<
"character constant too long for its type">;
def err_exponent_has_no_digits : Error<"exponent has no digits">;
@@ -102,6 +102,8 @@ def warn_ucn_escape_too_large : ExtWarn<
"character unicode escape sequence too long for its type">;
def warn_ucn_not_valid_in_c89 : ExtWarn<
"unicode escape sequences are only valid in C99 or C++">;
+def err_unsupported_string_concat : Error<
+ "unsupported non-standard concatenation of string literals">;
//===----------------------------------------------------------------------===//
// PTH Diagnostics
diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h
index be1fa196c0..3390f7809d 100644
--- a/include/clang/Basic/IdentifierTable.h
+++ b/include/clang/Basic/IdentifierTable.h
@@ -50,8 +50,8 @@ namespace clang {
/// set, and all tok::identifier tokens have a pointer to one of these.
class IdentifierInfo {
// Note: DON'T make TokenID a 'tok::TokenKind'; MSVC will treat it as a
- // signed char and TokenKinds > 127 won't be handled correctly.
- unsigned TokenID : 8; // Front-end token ID or tok::identifier.
+ // signed char and TokenKinds > 255 won't be handled correctly.
+ unsigned TokenID : 9; // Front-end token ID or tok::identifier.
// Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
// First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
// are for builtins.
@@ -65,7 +65,7 @@ class IdentifierInfo {
// file and wasn't modified since.
bool RevertedTokenID : 1; // True if RevertTokenIDToIdentifier was
// called.
- // 6 bits left in 32-bit word.
+ // 5 bits left in 32-bit word.
void *FETokenInfo; // Managed by the language front-end.
llvm::StringMapEntry<IdentifierInfo*> *Entry;
@@ -409,6 +409,7 @@ public:
IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
IdentifierInfo &II = get(Name);
II.TokenID = TokenCode;
+ assert(II.TokenID == TokenCode && "TokenCode too large");
return II;
}
diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def
index 86172b83ff..d057559889 100644
--- a/include/clang/Basic/TokenKinds.def
+++ b/include/clang/Basic/TokenKinds.def
@@ -114,13 +114,23 @@ TOK(raw_identifier) // Used only in raw lexing mode.
TOK(numeric_constant) // 0x123
// C99 6.4.4: Character Constants
-TOK(char_constant) // 'a' L'b'
+TOK(char_constant) // 'a'
+TOK(wide_char_constant) // L'b'
+
+// C++0x Character Constants
+TOK(utf16_char_constant) // u'a'
+TOK(utf32_char_constant) // U'a'
// C99 6.4.5: String Literals.
TOK(string_literal) // "foo"
TOK(wide_string_literal) // L"foo"
TOK(angle_string_literal)// <foo>
+// C++0x String Literals.
+TOK(utf8_string_literal) // u8"foo"
+TOK(utf16_string_literal)// u"foo"
+TOK(utf32_string_literal)// U"foo"
+
// C99 6.4.6: Punctuators.
PUNCTUATOR(l_square, "[")
PUNCTUATOR(r_square, "]")
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 990c1eedbb..2c25597433 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -471,9 +471,11 @@ private:
// Helper functions to lex the remainder of a token of the specific type.
void LexIdentifier (Token &Result, const char *CurPtr);
void LexNumericConstant (Token &Result, const char *CurPtr);
- void LexStringLiteral (Token &Result, const char *CurPtr,bool Wide);
+ void LexStringLiteral (Token &Result, const char *CurPtr,
+ tok::TokenKind Kind);
void LexAngledStringLiteral(Token &Result, const char *CurPtr);
- void LexCharConstant (Token &Result, const char *CurPtr);
+ void LexCharConstant (Token &Result, const char *CurPtr,
+ tok::TokenKind Kind);
bool LexEndOfFile (Token &Result, const char *CurPtr);
bool SkipWhitespace (Token &Result, const char *CurPtr);
diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h
index 6486c38a40..15057299b2 100644
--- a/include/clang/Lex/LiteralSupport.h
+++ b/include/clang/Lex/LiteralSupport.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/DataTypes.h"
+#include "clang/Basic/TokenKinds.h"
#include <cctype>
namespace clang {
@@ -124,15 +125,19 @@ private:
/// character literal.
class CharLiteralParser {
uint64_t Value;
- bool IsWide;
+ tok::TokenKind Kind;
bool IsMultiChar;
bool HadError;
public:
CharLiteralParser(const char *begin, const char *end,
- SourceLocation Loc, Preprocessor &PP);
+ SourceLocation Loc, Preprocessor &PP,
+ tok::TokenKind kind);
bool hadError() const { return HadError; }
- bool isWide() const { return IsWide; }
+ bool isAscii() const { return Kind == tok::char_constant; }
+ bool isWide() const { return Kind == tok::wide_char_constant; }
+ bool isUTF16() const { return Kind == tok::utf16_char_constant; }
+ bool isUTF32() const { return Kind == tok::utf32_char_constant; }
bool isMultiChar() const { return IsMultiChar; }
uint64_t getValue() const { return Value; }
};
@@ -148,7 +153,8 @@ class StringLiteralParser {
unsigned MaxTokenLength;
unsigned SizeBound;
- unsigned wchar_tByteWidth;
+ unsigned CharByteWidth;
+ tok::TokenKind Kind;
llvm::SmallString<512> ResultBuf;
char *ResultPtr; // cursor
public:
@@ -158,14 +164,13 @@ public:
const SourceManager &sm, const LangOptions &features,
const TargetInfo &target, Diagnostic *diags = 0)
: SM(sm), Features(features), Target(target), Diags(diags),
- MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),
- ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {
+ MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
+ ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
init(StringToks, NumStringToks);
}
bool hadError;
- bool AnyWide;
bool Pascal;
StringRef GetString() const {
@@ -174,9 +179,7 @@ public:
unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
unsigned GetNumStringChars() const {
- if (AnyWide)
- return GetStringLength() / wchar_tByteWidth;
- return GetStringLength();
+ return GetStringLength() / CharByteWidth;
}
/// getOffsetOfStringByte - This function returns the offset of the
/// specified byte of the string data represented by Token. This handles
@@ -185,7 +188,13 @@ public:
/// If the Diagnostics pointer is non-null, then this will do semantic
/// checking of the string literal and emit errors and warnings.
unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
-
+
+ bool isAscii() { return Kind == tok::string_literal; }
+ bool isWide() { return Kind == tok::wide_string_literal; }
+ bool isUTF8() { return Kind == tok::utf8_string_literal; }
+ bool isUTF16() { return Kind == tok::utf16_string_literal; }
+ bool isUTF32() { return Kind == tok::utf32_string_literal; }
+
private:
void init(const Token *StringToks, unsigned NumStringToks);
};
diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h
index 9cf11d9a64..e6dd1607e8 100644
--- a/include/clang/Lex/Token.h
+++ b/include/clang/Lex/Token.h
@@ -96,7 +96,10 @@ public:
/// constant, string, etc.
bool isLiteral() const {
return is(tok::numeric_constant) || is(tok::char_constant) ||
- is(tok::string_literal) || is(tok::wide_string_literal) ||
+ is(tok::wide_char_constant) || is(tok::utf16_char_constant) ||
+ is(tok::utf32_char_constant) || is(tok::string_literal) ||
+ is(tok::wide_string_literal) || is(tok::utf8_string_literal) ||
+ is(tok::utf16_string_literal) || is(tok::utf32_string_literal) ||
is(tok::angle_string_literal);
}
diff --git a/include/clang/Lex/TokenConcatenation.h b/include/clang/Lex/TokenConcatenation.h
index 094990a6e3..551300f402 100644
--- a/include/clang/Lex/TokenConcatenation.h
+++ b/include/clang/Lex/TokenConcatenation.h
@@ -63,12 +63,9 @@ namespace clang {
const Token &Tok) const;
private:
- /// StartsWithL - Return true if the spelling of this token starts with 'L'.
- bool StartsWithL(const Token &Tok) const;
-
- /// IsIdentifierL - Return true if the spelling of this token is literally
- /// 'L'.
- bool IsIdentifierL(const Token &Tok) const;
+ /// IsIdentifierStringPrefix - Return true if the spelling of the token
+ /// is literally 'L', 'u', 'U', or 'u8'.
+ bool IsIdentifierStringPrefix(const Token &Tok) const;
};
} // end clang namespace
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index 5d9376c1f7..83b0cd455e 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -265,7 +265,10 @@ private:
///
bool isTokenStringLiteral() const {
return Tok.getKind() == tok::string_literal ||
- Tok.getKind() == tok::wide_string_literal;
+ Tok.getKind() == tok::wide_string_literal ||
+ Tok.getKind() == tok::utf8_string_literal ||
+ Tok.getKind() == tok::utf16_string_literal ||
+ Tok.getKind() == tok::utf32_string_literal;
}
/// \brief Returns true if the current token is a '=' or '==' and
diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp
index 2ea79912d1..d6e7d77d0f 100644
--- a/lib/AST/ASTImporter.cpp
+++ b/lib/AST/ASTImporter.cpp
@@ -3814,8 +3814,8 @@ Expr *ASTNodeImporter::VisitCharacterLiteral(CharacterLiteral *E) {
if (T.isNull())
return 0;
- return new (Importer.getToContext()) CharacterLiteral(E->getValue(),
- E->isWide(), T,
+ return new (Importer.getToContext()) CharacterLiteral(E->getValue(),
+ E->getKind(), T,
Importer.Import(E->getLocation()));
}
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 58fb32d278..5e795be56d 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -533,8 +533,7 @@ double FloatingLiteral::getValueAsApproximateDouble() const {
}
StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str,
- bool Wide,
- bool Pascal, QualType Ty,
+ StringKind Kind, bool Pascal, QualType Ty,
const SourceLocation *Loc,
unsigned NumStrs) {
// Allocate enough space for the StringLiteral plus an array of locations for
@@ -549,7 +548,7 @@ StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str,
memcpy(AStrData, Str.data(), Str.size());
SL->StrData = AStrData;
SL->ByteLength = Str.size();
- SL->IsWide = Wide;
+ SL->Kind = Kind;
SL->IsPascal = Pascal;
SL->TokLocs[0] = Loc[0];
SL->NumConcatenated = NumStrs;
@@ -587,8 +586,8 @@ void StringLiteral::setString(ASTContext &C, StringRef Str) {
SourceLocation StringLiteral::
getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
const LangOptions &Features, const TargetInfo &Target) const {
- assert(!isWide() && "This doesn't work for wide strings yet");
-
+ assert(Kind == StringLiteral::Ascii && "This only works for ASCII strings");
+
// Loop over all of the tokens in this string until we find the one that
// contains the byte we're looking for.
unsigned TokNo = 0;
diff --git a/lib/AST/StmtDumper.cpp b/lib/AST/StmtDumper.cpp
index 7218af570f..ce4ae8e773 100644
--- a/lib/AST/StmtDumper.cpp
+++ b/lib/AST/StmtDumper.cpp
@@ -443,8 +443,13 @@ void StmtDumper::VisitStringLiteral(StringLiteral *Str) {
DumpExpr(Str);
// FIXME: this doesn't print wstrings right.
OS << " ";
- if (Str->isWide())
- OS << "L";
+ switch (Str->getKind()) {
+ case StringLiteral::Ascii: break; // No prefix
+ case StringLiteral::Wide: OS << 'L'; break;
+ case StringLiteral::UTF8: OS << "u8"; break;
+ case StringLiteral::UTF16: OS << 'u'; break;
+ case StringLiteral::UTF32: OS << 'U'; break;
+ }
OS << '"';
OS.write_escaped(Str->getString());
OS << '"';
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index 8fcad14ec2..79f14bc658 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -599,8 +599,14 @@ void StmtPrinter::VisitPredefinedExpr(PredefinedExpr *Node) {
void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
unsigned value = Node->getValue();
- if (Node->isWide())
- OS << "L";
+
+ switch (Node->getKind()) {
+ case CharacterLiteral::Ascii: break; // no prefix.
+ case CharacterLiteral::Wide: OS << 'L'; break;
+ case CharacterLiteral::UTF16: OS << 'u'; break;
+ case CharacterLiteral::UTF32: OS << 'U'; break;
+ }
+
switch (value) {
case '\\':
OS << "'\\\\'";
@@ -672,7 +678,13 @@ void StmtPrinter::VisitImaginaryLiteral(ImaginaryLiteral *Node) {
}
void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
- if (Str->isWide()) OS << 'L';
+ switch (Str->getKind()) {
+ case StringLiteral::Ascii: break; // no prefix.
+ case StringLiteral::Wide: OS << 'L'; break;
+ case StringLiteral::UTF8: OS << "u8"; break;
+ case StringLiteral::UTF16: OS << 'u'; break;
+ case StringLiteral::UTF32: OS << 'U'; break;
+ }
OS << '"';
// FIXME: this doesn't print wstrings right.
diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp
index 120c9e50a9..12321ef0d6 100644
--- a/lib/AST/StmtProfile.cpp
+++ b/lib/AST/StmtProfile.cpp
@@ -252,7 +252,7 @@ void StmtProfiler::VisitIntegerLiteral(const IntegerLiteral *S) {
void StmtProfiler::VisitCharacterLiteral(const CharacterLiteral *S) {
VisitExpr(S);
- ID.AddBoolean(S->isWide());
+ ID.AddInteger(S->getKind());
ID.AddInteger(S->getValue());
}
@@ -269,7 +269,7 @@ void StmtProfiler::VisitImaginaryLiteral(const ImaginaryLiteral *S) {
void StmtProfiler::VisitStringLiteral(const StringLiteral *S) {
VisitExpr(S);
ID.AddString(S->getString());
- ID.AddBoolean(S->isWide());
+ ID.AddInteger(S->getKind());
}
void StmtProfiler::VisitParenExpr(const ParenExpr *S) {
diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp
index 7cd3be2fb4..2555ab31fb 100644
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -635,6 +635,18 @@ bool Type::isWideCharType() const {
return false;
}
+bool Type::isChar16Type() const {
+ if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
+ return BT->getKind() == BuiltinType::Char16;
+ return false;
+}
+
+bool Type::isChar32Type() const {
+ if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
+ return BT->getKind() == BuiltinType::Char32;
+ return false;
+}
+
/// \brief Determine whether this type is any of the built-in character
/// types.
bool Type::isAnyCharacterType() const {
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 290fe242c9..ce32325aca 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -1877,8 +1877,20 @@ std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) {
// Resize the string to the right size.
uint64_t RealLen = CAT->getSize().getZExtValue();
- if (E->isWide())
+ switch (E->getKind()) {
+ case StringLiteral::Ascii:
+ case StringLiteral::UTF8:
+ break;
+ case StringLiteral::Wide:
RealLen *= Context.Target.getWCharWidth() / Context.getCharWidth();
+ break;
+ case StringLiteral::UTF16:
+ RealLen *= Context.Target.getChar16Width() / Context.getCharWidth();
+ break;
+ case StringLiteral::UTF32:
+ RealLen *= Context.Target.getChar32Width() / Context.getCharWidth();
+ break;
+ }
std::string Str = E->getString().str();
Str.resize(RealLen, '\0');
@@ -1893,7 +1905,7 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) {
// FIXME: This can be more efficient.
// FIXME: We shouldn't need to bitcast the constant in the wide string case.
llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S));
- if (S->isWide()) {
+ if (S->isWide() || S->isUTF16() || S->isUTF32()) {
llvm::Type *DestTy =
llvm::PointerType::getUnqual(getTypes().ConvertType(S->getType()));
C = llvm::ConstantExpr::getBitCast(C, DestTy);
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 6c7169f89b..44674a93d7 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1267,8 +1267,9 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
}
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
-/// either " or L".
-void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
+/// either " or L" or u8" or u" or U".
+void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
const char *NulCharacter = 0; // Does this string contain the \0 character?
char C = getAndAdvanceChar(CurPtr, Result);
@@ -1299,8 +1300,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
// Update the location of the token as well as the BufferPtr instance var.
const char *TokStart = BufferPtr;
- FormTokenWithChars(Result, CurPtr,
- Wide ? tok::wide_string_literal : tok::string_literal);
+ FormTokenWithChars(Result, CurPtr, Kind);
Result.setLiteralData(TokStart);
}
@@ -1339,8 +1339,9 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
/// LexCharConstant - Lex the remainder of a character constant, after having
-/// lexed either ' or L'.
-void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
+/// lexed either ' or L' or u' or U'.
+void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
const char *NulCharacter = 0; // Does this character contain the \0 character?
char C = getAndAdvanceChar(CurPtr, Result);
@@ -1377,7 +1378,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
// Update the location of token as well as BufferPtr.
co