aboutsummaryrefslogtreecommitdiff
path: root/lib/AST/Expr.cpp
diff options
context:
space:
mode:
authorRichard Trieu <rtrieu@google.com>2012-06-13 20:25:24 +0000
committerRichard Trieu <rtrieu@google.com>2012-06-13 20:25:24 +0000
commit8ab09da1faaa33b9fa78de59cc4e191bfe9907b5 (patch)
treec160032f5cb75e84f24c67d5109d82b38f4e7f55 /lib/AST/Expr.cpp
parentdc9327a8da640ecf7b15c5d5b726cd67a0e63284 (diff)
Moved the StringLiteral printing code from StmtPrinter into the StringLiteral
class and have StmtPrinter and StmtDumper refer to it. This fixes an assertion failure when dumping wchar string literals. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158417 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/AST/Expr.cpp')
-rw-r--r--lib/AST/Expr.cpp93
1 files changed, 93 insertions, 0 deletions
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index d3f6a521f1..5bbf7503f8 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -633,6 +633,99 @@ StringLiteral *StringLiteral::CreateEmpty(ASTContext &C, unsigned NumStrs) {
return SL;
}
+void StringLiteral::outputString(raw_ostream &OS) {
+ switch (getKind()) {
+ case Ascii: break; // no prefix.
+ case Wide: OS << 'L'; break;
+ case UTF8: OS << "u8"; break;
+ case UTF16: OS << 'u'; break;
+ case UTF32: OS << 'U'; break;
+ }
+ OS << '"';
+ static const char Hex[] = "0123456789ABCDEF";
+
+ unsigned LastSlashX = getLength();
+ for (unsigned I = 0, N = getLength(); I != N; ++I) {
+ switch (uint32_t Char = getCodeUnit(I)) {
+ default:
+ // FIXME: Convert UTF-8 back to codepoints before rendering.
+
+ // Convert UTF-16 surrogate pairs back to codepoints before rendering.
+ // Leave invalid surrogates alone; we'll use \x for those.
+ if (getKind() == UTF16 && I != N - 1 && Char >= 0xd800 &&
+ Char <= 0xdbff) {
+ uint32_t Trail = getCodeUnit(I + 1);
+ if (Trail >= 0xdc00 && Trail <= 0xdfff) {
+ Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00);
+ ++I;
+ }
+ }
+
+ if (Char > 0xff) {
+ // If this is a wide string, output characters over 0xff using \x
+ // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a
+ // codepoint: use \x escapes for invalid codepoints.
+ if (getKind() == Wide ||
+ (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) {
+ // FIXME: Is this the best way to print wchar_t?
+ OS << "\\x";
+ int Shift = 28;
+ while ((Char >> Shift) == 0)
+ Shift -= 4;
+ for (/**/; Shift >= 0; Shift -= 4)
+ OS << Hex[(Char >> Shift) & 15];
+ LastSlashX = I;
+ break;
+ }
+
+ if (Char > 0xffff)
+ OS << "\\U00"
+ << Hex[(Char >> 20) & 15]
+ << Hex[(Char >> 16) & 15];
+ else
+ OS << "\\u";
+ OS << Hex[(Char >> 12) & 15]
+ << Hex[(Char >> 8) & 15]
+ << Hex[(Char >> 4) & 15]
+ << Hex[(Char >> 0) & 15];
+ break;
+ }
+
+ // If we used \x... for the previous character, and this character is a
+ // hexadecimal digit, prevent it being slurped as part of the \x.
+ if (LastSlashX + 1 == I) {
+ switch (Char) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ OS << "\"\"";
+ }
+ }
+
+ assert(Char <= 0xff &&
+ "Characters above 0xff should already have been handled.");
+
+ if (isprint(Char))
+ OS << (char)Char;
+ else // Output anything hard as an octal escape.
+ OS << '\\'
+ << (char)('0' + ((Char >> 6) & 7))
+ << (char)('0' + ((Char >> 3) & 7))
+ << (char)('0' + ((Char >> 0) & 7));
+ break;
+ // Handle some common non-printable cases to make dumps prettier.
+ case '\\': OS << "\\\\"; break;
+ case '"': OS << "\\\""; break;
+ case '\n': OS << "\\n"; break;
+ case '\t': OS << "\\t"; break;
+ case '\a': OS << "\\a"; break;
+ case '\b': OS << "\\b"; break;
+ }
+ }
+ OS << '"';
+}
+
void StringLiteral::setString(ASTContext &C, StringRef Str,
StringKind Kind, bool IsPascal) {
//FIXME: we assume that the string data comes from a target that uses the same