diff options
-rw-r--r-- | lib/AST/StmtPrinter.cpp | 51 | ||||
-rw-r--r-- | test/SemaCXX/constexpr-printing.cpp | 4 | ||||
-rw-r--r-- | test/SemaCXX/static-assert.cpp | 7 |
3 files changed, 55 insertions, 7 deletions
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp index 7ebc1299f0..ef5eefb306 100644 --- a/lib/AST/StmtPrinter.cpp +++ b/lib/AST/StmtPrinter.cpp @@ -727,12 +727,40 @@ void StmtPrinter::VisitStringLiteral(StringLiteral *Str) { OS << '"'; static char Hex[] = "0123456789ABCDEF"; + unsigned LastSlashX = Str->getLength(); for (unsigned I = 0, N = Str->getLength(); I != N; ++I) { switch (uint32_t Char = Str->getCodeUnit(I)) { default: - // FIXME: Is this the best way to print wchar_t? + // FIXME: Convert UTF-8 back to codepoints before rendering. + + // Convert UTF-16 surrogate pairs back to codepoints before rendering. + // Leave invalid surrogates alone; we'll use \x for those. + if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 && + Char >= 0xd800 && Char <= 0xdbff) { + uint32_t Trail = Str->getCodeUnit(I + 1); + if (Trail >= 0xdc00 && Trail <= 0xdfff) { + Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00); + ++I; + } + } + if (Char > 0xff) { - assert(Char <= 0x10ffff && "invalid unicode codepoint"); + // If this is a wide string, output characters over 0xff using \x + // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a + // codepoint: use \x escapes for invalid codepoints. + if (Str->getKind() == StringLiteral::Wide || + (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) { + // FIXME: Is this the best way to print wchar_t? + OS << "\\x"; + int Shift = 28; + while ((Char >> Shift) == 0) + Shift -= 4; + for (/**/; Shift >= 0; Shift -= 4) + OS << Hex[(Char >> Shift) & 15]; + LastSlashX = I; + break; + } + if (Char > 0xffff) OS << "\\U00" << Hex[(Char >> 20) & 15] @@ -745,13 +773,26 @@ void StmtPrinter::VisitStringLiteral(StringLiteral *Str) { << Hex[(Char >> 0) & 15]; break; } + + // If we used \x... for the previous character, and this character is a + // hexadecimal digit, prevent it being slurped as part of the \x. + if (LastSlashX + 1 == I) { + switch (Char) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + OS << "\"\""; + } + } + if (Char <= 0xff && isprint(Char)) OS << (char)Char; else // Output anything hard as an octal escape. OS << '\\' - << (char)('0'+ ((Char >> 6) & 7)) - << (char)('0'+ ((Char >> 3) & 7)) - << (char)('0'+ ((Char >> 0) & 7)); + << (char)('0' + ((Char >> 6) & 7)) + << (char)('0' + ((Char >> 3) & 7)) + << (char)('0' + ((Char >> 0) & 7)); break; // Handle some common non-printable cases to make dumps prettier. case '\\': OS << "\\\\"; break; diff --git a/test/SemaCXX/constexpr-printing.cpp b/test/SemaCXX/constexpr-printing.cpp index 4e5bc429db..fc0cce25eb 100644 --- a/test/SemaCXX/constexpr-printing.cpp +++ b/test/SemaCXX/constexpr-printing.cpp @@ -85,8 +85,8 @@ constexpr char16_t c16 = get(u"test\0\\\"\t\a\b\234\u1234"); // \ expected-error {{}} expected-note {{u"test\000\\\"\t\a\b\234\u1234"}} constexpr char32_t c32 = get(U"test\0\\\"\t\a\b\234\u1234\U0010ffff"); // \ expected-error {{}} expected-note {{U"test\000\\\"\t\a\b\234\u1234\U0010FFFF"}} -constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234"); // \ - expected-error {{}} expected-note {{L"test\000\\\"\t\a\b\234\u1234"}} +constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234\xffffffff"); // \ + expected-error {{}} expected-note {{L"test\000\\\"\t\a\b\234\x1234\xFFFFFFFF"}} constexpr char32_t c32_err = get(U"\U00110000"); // expected-error {{invalid universal character}} diff --git a/test/SemaCXX/static-assert.cpp b/test/SemaCXX/static-assert.cpp index 2b44e81d3e..68ef0183e2 100644 --- a/test/SemaCXX/static-assert.cpp +++ b/test/SemaCXX/static-assert.cpp @@ -27,3 +27,10 @@ template<typename T> struct S { S<char> s1; // expected-note {{in instantiation of template class 'S<char>' requested here}} S<int> s2; + +static_assert(false, L"\xFFFFFFFF"); // expected-error {{static_assert failed L"\xFFFFFFFF"}} +static_assert(false, u"\U000317FF"); // expected-error {{static_assert failed u"\U000317FF"}} +// FIXME: render this as u8"\u03A9" +static_assert(false, u8"Ω"); // expected-error {{static_assert failed u8"\316\251"}} +static_assert(false, L"\u1234"); // expected-error {{static_assert failed L"\x1234"}} +static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static_assert failed L"\x1FF""0\x123""fx\xFFFFFgoop"}} |