aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Trieu <rtrieu@google.com>2012-06-13 20:25:24 +0000
committerRichard Trieu <rtrieu@google.com>2012-06-13 20:25:24 +0000
commit8ab09da1faaa33b9fa78de59cc4e191bfe9907b5 (patch)
treec160032f5cb75e84f24c67d5109d82b38f4e7f55
parentdc9327a8da640ecf7b15c5d5b726cd67a0e63284 (diff)
Moved the StringLiteral printing code from StmtPrinter into the StringLiteral
class and have StmtPrinter and StmtDumper refer to it. This fixes an assertion failure when dumping wchar string literals. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158417 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang/AST/Expr.h2
-rw-r--r--lib/AST/Expr.cpp93
-rw-r--r--lib/AST/StmtDumper.cpp12
-rw-r--r--lib/AST/StmtPrinter.cpp88
-rw-r--r--test/Misc/ast-dump-wchar.cpp13
5 files changed, 110 insertions, 98 deletions
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h
index 40da0705f3..371263296f 100644
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1399,6 +1399,8 @@ public:
getByteLength());
}
+ void outputString(raw_ostream &OS);
+
uint32_t getCodeUnit(size_t i) const {
assert(i < Length && "out of bounds access");
if (CharByteWidth == 1)
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index d3f6a521f1..5bbf7503f8 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -633,6 +633,99 @@ StringLiteral *StringLiteral::CreateEmpty(ASTContext &C, unsigned NumStrs) {
return SL;
}
+void StringLiteral::outputString(raw_ostream &OS) {
+ switch (getKind()) {
+ case Ascii: break; // no prefix.
+ case Wide: OS << 'L'; break;
+ case UTF8: OS << "u8"; break;
+ case UTF16: OS << 'u'; break;
+ case UTF32: OS << 'U'; break;
+ }
+ OS << '"';
+ static const char Hex[] = "0123456789ABCDEF";
+
+ unsigned LastSlashX = getLength();
+ for (unsigned I = 0, N = getLength(); I != N; ++I) {
+ switch (uint32_t Char = getCodeUnit(I)) {
+ default:
+ // FIXME: Convert UTF-8 back to codepoints before rendering.
+
+ // Convert UTF-16 surrogate pairs back to codepoints before rendering.
+ // Leave invalid surrogates alone; we'll use \x for those.
+ if (getKind() == UTF16 && I != N - 1 && Char >= 0xd800 &&
+ Char <= 0xdbff) {
+ uint32_t Trail = getCodeUnit(I + 1);
+ if (Trail >= 0xdc00 && Trail <= 0xdfff) {
+ Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00);
+ ++I;
+ }
+ }
+
+ if (Char > 0xff) {
+ // If this is a wide string, output characters over 0xff using \x
+ // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a
+ // codepoint: use \x escapes for invalid codepoints.
+ if (getKind() == Wide ||
+ (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) {
+ // FIXME: Is this the best way to print wchar_t?
+ OS << "\\x";
+ int Shift = 28;
+ while ((Char >> Shift) == 0)
+ Shift -= 4;
+ for (/**/; Shift >= 0; Shift -= 4)
+ OS << Hex[(Char >> Shift) & 15];
+ LastSlashX = I;
+ break;
+ }
+
+ if (Char > 0xffff)
+ OS << "\\U00"
+ << Hex[(Char >> 20) & 15]
+ << Hex[(Char >> 16) & 15];
+ else
+ OS << "\\u";
+ OS << Hex[(Char >> 12) & 15]
+ << Hex[(Char >> 8) & 15]
+ << Hex[(Char >> 4) & 15]
+ << Hex[(Char >> 0) & 15];
+ break;
+ }
+
+ // If we used \x... for the previous character, and this character is a
+ // hexadecimal digit, prevent it being slurped as part of the \x.
+ if (LastSlashX + 1 == I) {
+ switch (Char) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ OS << "\"\"";
+ }
+ }
+
+ assert(Char <= 0xff &&
+ "Characters above 0xff should already have been handled.");
+
+ if (isprint(Char))
+ OS << (char)Char;
+ else // Output anything hard as an octal escape.
+ OS << '\\'
+ << (char)('0' + ((Char >> 6) & 7))
+ << (char)('0' + ((Char >> 3) & 7))
+ << (char)('0' + ((Char >> 0) & 7));
+ break;
+ // Handle some common non-printable cases to make dumps prettier.
+ case '\\': OS << "\\\\"; break;
+ case '"': OS << "\\\""; break;
+ case '\n': OS << "\\n"; break;
+ case '\t': OS << "\\t"; break;
+ case '\a': OS << "\\a"; break;
+ case '\b': OS << "\\b"; break;
+ }
+ }
+ OS << '"';
+}
+
void StringLiteral::setString(ASTContext &C, StringRef Str,
StringKind Kind, bool IsPascal) {
//FIXME: we assume that the string data comes from a target that uses the same
diff --git a/lib/AST/StmtDumper.cpp b/lib/AST/StmtDumper.cpp
index df0052760b..a57cce8371 100644
--- a/lib/AST/StmtDumper.cpp
+++ b/lib/AST/StmtDumper.cpp
@@ -446,18 +446,8 @@ void StmtDumper::VisitFloatingLiteral(FloatingLiteral *Node) {
void StmtDumper::VisitStringLiteral(StringLiteral *Str) {
DumpExpr(Str);
- // FIXME: this doesn't print wstrings right.
OS << " ";
- switch (Str->getKind()) {
- case StringLiteral::Ascii: break; // No prefix
- case StringLiteral::Wide: OS << 'L'; break;
- case StringLiteral::UTF8: OS << "u8"; break;
- case StringLiteral::UTF16: OS << 'u'; break;
- case StringLiteral::UTF32: OS << 'U'; break;
- }
- OS << '"';
- OS.write_escaped(Str->getString());
- OS << '"';
+ Str->outputString(OS);
}
void StmtDumper::VisitUnaryOperator(UnaryOperator *Node) {
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index 30548aea60..cb757cdde1 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -739,93 +739,7 @@ void StmtPrinter::VisitImaginaryLiteral(ImaginaryLiteral *Node) {
}
void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
- switch (Str->getKind()) {
- case StringLiteral::Ascii: break; // no prefix.
- case StringLiteral::Wide: OS << 'L'; break;
- case StringLiteral::UTF8: OS << "u8"; break;
- case StringLiteral::UTF16: OS << 'u'; break;
- case StringLiteral::UTF32: OS << 'U'; break;
- }
- OS << '"';
- static const char Hex[] = "0123456789ABCDEF";
-
- unsigned LastSlashX = Str->getLength();
- for (unsigned I = 0, N = Str->getLength(); I != N; ++I) {
- switch (uint32_t Char = Str->getCodeUnit(I)) {
- default:
- // FIXME: Convert UTF-8 back to codepoints before rendering.
-
- // Convert UTF-16 surrogate pairs back to codepoints before rendering.
- // Leave invalid surrogates alone; we'll use \x for those.
- if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 &&
- Char >= 0xd800 && Char <= 0xdbff) {
- uint32_t Trail = Str->getCodeUnit(I + 1);
- if (Trail >= 0xdc00 && Trail <= 0xdfff) {
- Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00);
- ++I;
- }
- }
-
- if (Char > 0xff) {
- // If this is a wide string, output characters over 0xff using \x
- // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a
- // codepoint: use \x escapes for invalid codepoints.
- if (Str->getKind() == StringLiteral::Wide ||
- (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) {
- // FIXME: Is this the best way to print wchar_t?
- OS << "\\x";
- int Shift = 28;
- while ((Char >> Shift) == 0)
- Shift -= 4;
- for (/**/; Shift >= 0; Shift -= 4)
- OS << Hex[(Char >> Shift) & 15];
- LastSlashX = I;
- break;
- }
-
- if (Char > 0xffff)
- OS << "\\U00"
- << Hex[(Char >> 20) & 15]
- << Hex[(Char >> 16) & 15];
- else
- OS << "\\u";
- OS << Hex[(Char >> 12) & 15]
- << Hex[(Char >> 8) & 15]
- << Hex[(Char >> 4) & 15]
- << Hex[(Char >> 0) & 15];
- break;
- }
-
- // If we used \x... for the previous character, and this character is a
- // hexadecimal digit, prevent it being slurped as part of the \x.
- if (LastSlashX + 1 == I) {
- switch (Char) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- OS << "\"\"";
- }
- }
-
- if (Char <= 0xff && isprint(Char))
- OS << (char)Char;
- else // Output anything hard as an octal escape.
- OS << '\\'
- << (char)('0' + ((Char >> 6) & 7))
- << (char)('0' + ((Char >> 3) & 7))
- << (char)('0' + ((Char >> 0) & 7));
- break;
- // Handle some common non-printable cases to make dumps prettier.
- case '\\': OS << "\\\\"; break;
- case '"': OS << "\\\""; break;
- case '\n': OS << "\\n"; break;
- case '\t': OS << "\\t"; break;
- case '\a': OS << "\\a"; break;
- case '\b': OS << "\\b"; break;
- }
- }
- OS << '"';
+ Str->outputString(OS);
}
void StmtPrinter::VisitParenExpr(ParenExpr *Node) {
OS << "(";
diff --git a/test/Misc/ast-dump-wchar.cpp b/test/Misc/ast-dump-wchar.cpp
new file mode 100644
index 0000000000..4153706bd6
--- /dev/null
+++ b/test/Misc/ast-dump-wchar.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -std=c++11 -ast-dump %s 2>&1 | FileCheck %s
+
+char c8[] = u8"test\0\\\"\t\a\b\234";
+// CHECK: char c8[12] = (StringLiteral {{.*}} lvalue u8"test\000\\\"\t\a\b\234")
+
+char16_t c16[] = u"test\0\\\"\t\a\b\234\u1234";
+// CHECK: char16_t c16[13] = (StringLiteral {{.*}} lvalue u"test\000\\\"\t\a\b\234\u1234")
+
+char32_t c32[] = U"test\0\\\"\t\a\b\234\u1234\U0010ffff"; // \
+// CHECK: char32_t c32[14] = (StringLiteral {{.*}} lvalue U"test\000\\\"\t\a\b\234\u1234\U0010FFFF")
+
+wchar_t wc[] = L"test\0\\\"\t\a\b\234\u1234\xffffffff"; // \
+// CHECK: wchar_t wc[14] = (StringLiteral {{.*}} lvalue L"test\000\\\"\t\a\b\234\x1234\xFFFFFFFF")