diff options
author | Eli Friedman <eli.friedman@gmail.com> | 2011-11-01 02:23:42 +0000 |
---|---|---|
committer | Eli Friedman <eli.friedman@gmail.com> | 2011-11-01 02:23:42 +0000 |
commit | 64f45a24b19eb89ff88f7c3ff0df9be8e861ac97 (patch) | |
tree | de9dd9c4244910961e8d67b69a0a83be4306154f | |
parent | f74a4587629615ffd13bd0724868f86ba8c8f27b (diff) |
Fix the representation of wide strings in the AST and IR so that it uses the native representation of integers for the elements. This fixes a bunch of nastiness involving
treating wide strings as a series of bytes.
Patch by Seth Cantrell.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@143417 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/AST/Expr.h | 55 | ||||
-rw-r--r-- | include/clang/Lex/LiteralSupport.h | 11 | ||||
-rw-r--r-- | lib/AST/Expr.cpp | 75 | ||||
-rw-r--r-- | lib/CodeGen/CGExprConstant.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.cpp | 75 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.h | 4 | ||||
-rw-r--r-- | lib/Sema/SemaExpr.cpp | 2 | ||||
-rw-r--r-- | lib/Serialization/ASTReaderStmt.cpp | 7 | ||||
-rw-r--r-- | lib/Serialization/ASTWriterStmt.cpp | 2 | ||||
-rw-r--r-- | test/CodeGen/global-init.c | 2 | ||||
-rw-r--r-- | test/CodeGen/pascal-wchar-string.c | 4 | ||||
-rw-r--r-- | test/CodeGen/string-literal-short-wstring.c | 4 | ||||
-rw-r--r-- | test/CodeGen/string-literal.c | 30 | ||||
-rw-r--r-- | test/CodeGen/wchar-const.c | 4 | ||||
-rw-r--r-- | test/CodeGenCXX/uncode-string.cpp | 2 |
15 files changed, 203 insertions, 82 deletions
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h index ca884b955a..fd88e1e88e 100644 --- a/include/clang/AST/Expr.h +++ b/include/clang/AST/Expr.h @@ -1271,8 +1271,13 @@ public: private: friend class ASTStmtReader; - const char *StrData; - unsigned ByteLength; + union { + const char *asChar; + const uint16_t *asUInt16; + const uint32_t *asUInt32; + } StrData; + unsigned Length; + unsigned CharByteWidth; unsigned NumConcatenated; unsigned Kind : 3; bool IsPascal : 1; @@ -1282,6 +1287,8 @@ private: Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary, false, false, false, false) {} + static int mapCharByteWidth(TargetInfo const &target,StringKind k); + public: /// This is the "fully general" constructor that allows representation of /// strings formed from multiple concatenated tokens. @@ -1300,15 +1307,52 @@ public: static StringLiteral *CreateEmpty(ASTContext &C, unsigned NumStrs); StringRef getString() const { - return StringRef(StrData, ByteLength); + assert(CharByteWidth==1 + && "This function is used in places that assume strings use char"); + return StringRef(StrData.asChar, getByteLength()); + } + + /// Allow clients that need the byte representation, such as ASTWriterStmt + /// ::VisitStringLiteral(), access. + StringRef getBytes() const { + // FIXME: StringRef may not be the right type to use as a result for this... + assert((CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4) + && "unsupported CharByteWidth"); + if (CharByteWidth==4) { + return StringRef(reinterpret_cast<const char*>(StrData.asUInt32), + getByteLength()); + } else if (CharByteWidth==2) { + return StringRef(reinterpret_cast<const char*>(StrData.asUInt16), + getByteLength()); + } else { + return StringRef(StrData.asChar, getByteLength()); + } } - unsigned getByteLength() const { return ByteLength; } + uint32_t getCodeUnit(size_t i) const { + assert(i<Length && "out of bounds access"); + assert((CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4) + && "unsupported CharByteWidth"); + if (CharByteWidth==4) { + return StrData.asUInt32[i]; + } else if (CharByteWidth==2) { + return StrData.asUInt16[i]; + } else { + return static_cast<unsigned char>(StrData.asChar[i]); + } + } + + unsigned getByteLength() const { return CharByteWidth*Length; } + unsigned getLength() const { return Length; } + unsigned getCharByteWidth() const { return CharByteWidth; } /// \brief Sets the string data to the given string data. - void setString(ASTContext &C, StringRef Str); + void setString(ASTContext &C, StringRef Str, + StringKind Kind, bool IsPascal); StringKind getKind() const { return static_cast<StringKind>(Kind); } + + bool isAscii() const { return Kind == Ascii; } bool isWide() const { return Kind == Wide; } bool isUTF8() const { return Kind == UTF8; } @@ -1323,6 +1367,7 @@ public: return true; return false; } + /// getNumConcatenated - Get the number of string literal tokens that were /// concatenated in translation phase #6 to form this string literal. unsigned getNumConcatenated() const { return NumConcatenated; } diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h index 6f378041ae..635d3cb67e 100644 --- a/include/clang/Lex/LiteralSupport.h +++ b/include/clang/Lex/LiteralSupport.h @@ -189,11 +189,12 @@ public: /// checking of the string literal and emit errors and warnings. unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; - bool isAscii() { return Kind == tok::string_literal; } - bool isWide() { return Kind == tok::wide_string_literal; } - bool isUTF8() { return Kind == tok::utf8_string_literal; } - bool isUTF16() { return Kind == tok::utf16_string_literal; } - bool isUTF32() { return Kind == tok::utf32_string_literal; } + bool isAscii() const { return Kind == tok::string_literal; } + bool isWide() const { return Kind == tok::wide_string_literal; } + bool isUTF8() const { return Kind == tok::utf8_string_literal; } + bool isUTF16() const { return Kind == tok::utf16_string_literal; } + bool isUTF32() const { return Kind == tok::utf32_string_literal; } + bool isPascal() const { return Pascal; } private: void init(const Token *StringToks, unsigned NumStringToks); diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp index 96a8125e18..3239973688 100644 --- a/lib/AST/Expr.cpp +++ b/lib/AST/Expr.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <cstring> using namespace clang; /// isKnownToHaveBooleanValue - Return true if this is an integer expression @@ -482,6 +483,29 @@ double FloatingLiteral::getValueAsApproximateDouble() const { return V.convertToDouble(); } +int StringLiteral::mapCharByteWidth(TargetInfo const &target,StringKind k) { + int CharByteWidth; + switch(k) { + case Ascii: + case UTF8: + CharByteWidth = target.getCharWidth(); + break; + case Wide: + CharByteWidth = target.getWCharWidth(); + break; + case UTF16: + CharByteWidth = target.getChar16Width(); + break; + case UTF32: + CharByteWidth = target.getChar32Width(); + } + assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple"); + CharByteWidth /= 8; + assert((CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4) + && "character byte widths supported are 1, 2, and 4 only"); + return CharByteWidth; +} + StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str, StringKind Kind, bool Pascal, QualType Ty, const SourceLocation *Loc, @@ -494,12 +518,8 @@ StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str, StringLiteral *SL = new (Mem) StringLiteral(Ty); // OPTIMIZE: could allocate this appended to the StringLiteral. - char *AStrData = new (C, 1) char[Str.size()]; - memcpy(AStrData, Str.data(), Str.size()); - SL->StrData = AStrData; - SL->ByteLength = Str.size(); - SL->Kind = Kind; - SL->IsPascal = Pascal; + SL->setString(C,Str,Kind,Pascal); + SL->TokLocs[0] = Loc[0]; SL->NumConcatenated = NumStrs; @@ -513,17 +533,46 @@ StringLiteral *StringLiteral::CreateEmpty(ASTContext &C, unsigned NumStrs) { sizeof(SourceLocation)*(NumStrs-1), llvm::alignOf<StringLiteral>()); StringLiteral *SL = new (Mem) StringLiteral(QualType()); - SL->StrData = 0; - SL->ByteLength = 0; + SL->CharByteWidth = 0; + SL->Length = 0; SL->NumConcatenated = NumStrs; return SL; } -void StringLiteral::setString(ASTContext &C, StringRef Str) { - char *AStrData = new (C, 1) char[Str.size()]; - memcpy(AStrData, Str.data(), Str.size()); - StrData = AStrData; - ByteLength = Str.size(); +void StringLiteral::setString(ASTContext &C, StringRef Str, + StringKind Kind, bool IsPascal) { + //FIXME: we assume that the string data comes from a target that uses the same + // code unit size and endianess for the type of string. + this->Kind = Kind; + this->IsPascal = IsPascal; + + CharByteWidth = mapCharByteWidth(C.getTargetInfo(),Kind); + assert((Str.size()%CharByteWidth == 0) + && "size of data must be multiple of CharByteWidth"); + Length = Str.size()/CharByteWidth; + + switch(CharByteWidth) { + case 1: { + char *AStrData = new (C) char[Length]; + std::memcpy(AStrData,Str.data(),Str.size()); + StrData.asChar = AStrData; + break; + } + case 2: { + uint16_t *AStrData = new (C) uint16_t[Length]; + std::memcpy(AStrData,Str.data(),Str.size()); + StrData.asUInt16 = AStrData; + break; + } + case 4: { + uint32_t *AStrData = new (C) uint32_t[Length]; + std::memcpy(AStrData,Str.data(),Str.size()); + StrData.asUInt32 = AStrData; + break; + } + default: + assert(false && "unsupported CharByteWidth"); + } } /// getLocationOfByte - Return a source location that points to the specified diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index 0622c10165..889cdd8f09 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -817,13 +817,7 @@ public: } llvm::Constant *VisitStringLiteral(StringLiteral *E) { - assert(!E->getType()->isPointerType() && "Strings are always arrays"); - - // This must be a string initializing an array in a static initializer. - // Don't emit it as the address of the string, emit the string data itself - // as an inline array. - return llvm::ConstantArray::get(VMContext, - CGM.GetStringForStringLiteral(E), false); + return CGM.GetConstantArrayFromStringLiteral(E); } llvm::Constant *VisitObjCEncodeExpr(ObjCEncodeExpr *E) { diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index c796e0daa9..0905c4b283 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -2037,6 +2037,8 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() { /// GetStringForStringLiteral - Return the appropriate bytes for a /// string literal, properly padded to match the literal type. std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) { + assert((E->isAscii() || E->isUTF8()) + && "Use GetConstantArrayFromStringLiteral for wide strings"); const ASTContext &Context = getContext(); const ConstantArrayType *CAT = Context.getAsConstantArrayType(E->getType()); @@ -2045,27 +2047,44 @@ std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) { // Resize the string to the right size. uint64_t RealLen = CAT->getSize().getZExtValue(); - switch (E->getKind()) { - case StringLiteral::Ascii: - case StringLiteral::UTF8: - break; - case StringLiteral::Wide: - RealLen *= Context.getTargetInfo().getWCharWidth() / Context.getCharWidth(); - break; - case StringLiteral::UTF16: - RealLen *= Context.getTargetInfo().getChar16Width() / Context.getCharWidth(); - break; - case StringLiteral::UTF32: - RealLen *= Context.getTargetInfo().getChar32Width() / Context.getCharWidth(); - break; - } - std::string Str = E->getString().str(); Str.resize(RealLen, '\0'); return Str; } +llvm::Constant * +CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { + assert(!E->getType()->isPointerType() && "Strings are always arrays"); + + // Don't emit it as the address of the string, emit the string data itself + // as an inline array. + if (E->getCharByteWidth()==1) { + return llvm::ConstantArray::get(VMContext, + GetStringForStringLiteral(E), false); + } else { + llvm::ArrayType *AType = + cast<llvm::ArrayType>(getTypes().ConvertType(E->getType())); + llvm::Type *ElemTy = AType->getElementType(); + unsigned NumElements = AType->getNumElements(); + std::vector<llvm::Constant*> Elts; + Elts.reserve(NumElements); + + for(unsigned i=0;i<E->getLength();++i) { + unsigned value = E->getCodeUnit(i); + llvm::Constant *C = llvm::ConstantInt::get(ElemTy,value,false); + Elts.push_back(C); + } + for(unsigned i=E->getLength();i<NumElements;++i) { + llvm::Constant *C = llvm::ConstantInt::get(ElemTy,0,false); + Elts.push_back(C); + } + + return llvm::ConstantArray::get(AType, Elts); + } + +} + /// GetAddrOfConstantStringFromLiteral - Return a pointer to a /// constant array for the given string literal. llvm::Constant * @@ -2073,15 +2092,23 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) { // FIXME: This can be more efficient. // FIXME: We shouldn't need to bitcast the constant in the wide string case. CharUnits Align = getContext().getTypeAlignInChars(S->getType()); - llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S), - /* GlobalName */ 0, - Align.getQuantity()); - if (S->isWide() || S->isUTF16() || S->isUTF32()) { - llvm::Type *DestTy = - llvm::PointerType::getUnqual(getTypes().ConvertType(S->getType())); - C = llvm::ConstantExpr::getBitCast(C, DestTy); - } - return C; + if (S->isAscii() || S->isUTF8()) { + return GetAddrOfConstantString(GetStringForStringLiteral(S), + /* GlobalName */ 0, + Align.getQuantity()); + } + + // FIXME: the following does not memoize wide strings + llvm::Constant *C = GetConstantArrayFromStringLiteral(S); + llvm::GlobalVariable *GV = + new llvm::GlobalVariable(getModule(),C->getType(), + !Features.WritableStrings, + llvm::GlobalValue::PrivateLinkage, + C,".str"); + GV->setAlignment(Align.getQuantity()); + GV->setUnnamedAddr(true); + + return GV; } /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index ea2e177605..0ce698ae9f 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -565,6 +565,10 @@ public: /// -fconstant-string-class=class_name option. llvm::Constant *GetAddrOfConstantString(const StringLiteral *Literal); + /// GetConstantArrayFromStringLiteral - Return a constant array for the given + /// string. + llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E); + /// GetAddrOfConstantStringFromLiteral - Return a pointer to a constant array /// for the given string literal. llvm::Constant *GetAddrOfConstantStringFromLiteral(const StringLiteral *S); diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index 21d0309377..61766a88c0 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -1141,7 +1141,7 @@ Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) { StrTy = Context.Char16Ty; else if (Literal.isUTF32()) StrTy = Context.Char32Ty; - else if (Literal.Pascal) + else if (Literal.isPascal()) StrTy = Context.UnsignedCharTy; StringLiteral::StringKind Kind = StringLiteral::Ascii; diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp index 87912af461..e57ab1937c 100644 --- a/lib/Serialization/ASTReaderStmt.cpp +++ b/lib/Serialization/ASTReaderStmt.cpp @@ -372,12 +372,13 @@ void ASTStmtReader::VisitStringLiteral(StringLiteral *E) { assert(Record[Idx] == E->getNumConcatenated() && "Wrong number of concatenated tokens!"); ++Idx; - E->Kind = static_cast<StringLiteral::StringKind>(Record[Idx++]); - E->IsPascal = Record[Idx++]; + StringLiteral::StringKind kind = + static_cast<StringLiteral::StringKind>(Record[Idx++]); + bool isPascal = Record[Idx++]; // Read string data llvm::SmallString<16> Str(&Record[Idx], &Record[Idx] + Len); - E->setString(Reader.getContext(), Str.str()); + E->setString(Reader.getContext(), Str.str(), kind, isPascal); Idx += Len; // Read source locations diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp index 0721c299a1..61570a880a 100644 --- a/lib/Serialization/ASTWriterStmt.cpp +++ b/lib/Serialization/ASTWriterStmt.cpp @@ -331,7 +331,7 @@ void ASTStmtWriter::VisitStringLiteral(StringLiteral *E) { // StringLiteral. However, we can't do so now because we have no // provision for coping with abbreviations when we're jumping around // the AST file during deserialization. - Record.append(E->getString().begin(), E->getString().end()); + Record.append(E->getBytes().begin(), E->getBytes().end()); for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I) Writer.AddSourceLocation(E->getStrTokenLoc(I), Record); Code = serialization::EXPR_STRING_LITERAL; diff --git a/test/CodeGen/global-init.c b/test/CodeGen/global-init.c index 074c2a065a..dab5a07d61 100644 --- a/test/CodeGen/global-init.c +++ b/test/CodeGen/global-init.c @@ -32,7 +32,7 @@ struct ManyFields FewInits = {1, 2}; // PR6766 -// CHECK: @l = global { [24 x i8], i32 } { [24 x i8] c"f\00\00\00o\00\00\00o\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", i32 1 } +// CHECK: @l = global %struct.K { [6 x i32] [i32 102, i32 111, i32 111, i32 0, i32 0, i32 0], i32 1 } typedef __WCHAR_TYPE__ wchar_t; struct K { wchar_t L[6]; diff --git a/test/CodeGen/pascal-wchar-string.c b/test/CodeGen/pascal-wchar-string.c index a6b619643e..626fc99f15 100644 --- a/test/CodeGen/pascal-wchar-string.c +++ b/test/CodeGen/pascal-wchar-string.c @@ -29,8 +29,8 @@ int main(int argc, char* argv[]) return 0; } -// CHECK: c"\03\00b\00a\00r\00\00\00" -// CHECK: c"\04\00g\00o\00r\00f\00\00\00" +// CHECK: [i16 3, i16 98, i16 97, i16 114, i16 0] +// CHECK: [i16 4, i16 103, i16 111, i16 114, i16 102, i16 0] // PR8856 - -fshort-wchar makes wchar_t be unsigned. diff --git a/test/CodeGen/string-literal-short-wstring.c b/test/CodeGen/string-literal-short-wstring.c index 770c3d4268..309ffd33b2 100644 --- a/test/CodeGen/string-literal-short-wstring.c +++ b/test/CodeGen/string-literal-short-wstring.c @@ -6,11 +6,11 @@ int main() { // CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1 char b[10] = "\u1120\u0220\U00102030"; - // CHECK: private unnamed_addr constant [6 x i8] c"A\00B\00\00\00" + // CHECK: private unnamed_addr constant [3 x i16] [i16 65, i16 66, i16 0] const wchar_t *foo = L"AB"; // This should convert to utf16. - // CHECK: private unnamed_addr constant [10 x i8] c" \11 \02\C8\DB0\DC\00\00" + // CHECK: private unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0] const wchar_t *bar = L"\u1120\u0220\U00102030"; diff --git a/test/CodeGen/string-literal.c b/test/CodeGen/string-literal.c index fa8f28a766..12d431a454 100644 --- a/test/CodeGen/string-literal.c +++ b/test/CodeGen/string-literal.c @@ -14,37 +14,37 @@ int main() { // CHECK-CPP0X: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1 char b[10] = "\u1120\u0220\U00102030"; - // CHECK-C: private unnamed_addr constant [12 x i8] c"A\00\00\00B\00\00\00\00\00\00\00", align 4 - // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"A\00\00\00B\00\00\00\00\00\00\00", align 4 + // CHECK-C: private unnamed_addr constant [3 x i32] [i32 65, i32 66, i32 0], align 4 + // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 65, i32 66, i32 0], align 4 const wchar_t *foo = L"AB"; - // CHECK-C: private unnamed_addr constant [12 x i8] c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 4 - // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 4 + // CHECK-C: private unnamed_addr constant [3 x i32] [i32 4660, i32 1110027, i32 0], align 4 + // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 4660, i32 1110027, i32 0], align 4 const wchar_t *bar = L"\u1234\U0010F00B"; - // CHECK-C: private unnamed_addr constant [12 x i8] c"4\12\00\00\0C\F0\10\00\00\00\00\00", align 4 - // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"4\12\00\00\0C\F0\10\00\00\00\00\00", align 4 + // CHECK-C: private unnamed_addr constant [3 x i32] [i32 4660, i32 1110028, i32 0], align 4 + // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 4660, i32 1110028, i32 0], align 4 const wchar_t *baz = L"\u1234" "\U0010F00C"; #if __cplusplus >= 201103L - // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"C\00\00\00D\00\00\00\00\00\00\00", align 4 + // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 67, i32 68, i32 0], align 4 const char32_t *c = U"CD"; - // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"5\12\00\00\0C\F0\10\00\00\00\00\00", align 4 + // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 4661, i32 1110028, i32 0], align 4 const char32_t *d = U"\u1235\U0010F00C"; - // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"5\12\00\00\0B\F0\10\00\00\00\00\00", align 4 + // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 4661, i32 1110027, i32 0], align 4 const char32_t *o = "\u1235" U"\U0010F00B"; - // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"E\00F\00\00\00", align 2 + // CHECK-CPP0X: private unnamed_addr constant [3 x i16] [i16 69, i16 70, i16 0], align 2 const char16_t *e = u"EF"; // This should convert to utf16. - // CHECK-CPP0X: private unnamed_addr constant [10 x i8] c" \11 \02\C8\DB0\DC\00\00", align 2 + // CHECK-CPP0X: private unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0], align 2 const char16_t *f = u"\u1120\u0220\U00102030"; // This should convert to utf16. - // CHECK-CPP0X: private unnamed_addr constant [10 x i8] c" \11 \03\C8\DB0\DC\00\00", align 2 + // CHECK-CPP0X: private unnamed_addr constant [5 x i16] [i16 4384, i16 800, i16 -9272, i16 -9168, i16 0], align 2 const char16_t *p = u"\u1120\u0320" "\U00102030"; // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1 @@ -56,13 +56,13 @@ int main() { // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"jkl\00", align 1 const char *i = u8R"bar(jkl)bar"; - // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"G\00H\00\00\00", align 2 + // CHECK-CPP0X: private unnamed_addr constant [3 x i16] [i16 71, i16 72, i16 0], align 2 const char16_t *j = uR"foo(GH)foo"; - // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"I\00\00\00J\00\00\00\00\00\00\00", align 4 + // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 73, i32 74, i32 0], align 4 const char32_t *k = UR"bar(IJ)bar"; - // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"K\00\00\00L\00\00\00\00\00\00\00", align 4 + // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 75, i32 76, i32 0], align 4 const wchar_t *l = LR"bar(KL)bar"; // CHECK-CPP0X: private unnamed_addr constant [9 x i8] c"abc\5Cndef\00", align 1 diff --git a/test/CodeGen/wchar-const.c b/test/CodeGen/wchar-const.c index b672b15360..a9e7e523f9 100644 --- a/test/CodeGen/wchar-const.c +++ b/test/CodeGen/wchar-const.c @@ -14,8 +14,8 @@ typedef __WCHAR_TYPE__ wchar_t; #endif -// CHECK-DAR: private unnamed_addr constant [72 x i8] c" -// CHECK-WIN: private unnamed_addr constant [36 x i8] c" +// CHECK-DAR: private unnamed_addr constant [18 x i32] [i32 84, +// CHECK-WIN: private unnamed_addr constant [18 x i16] [i16 84, extern void foo(const wchar_t* p); int main (int argc, const char * argv[]) { diff --git a/test/CodeGenCXX/uncode-string.cpp b/test/CodeGenCXX/uncode-string.cpp index e543149747..1d839992f9 100644 --- a/test/CodeGenCXX/uncode-string.cpp +++ b/test/CodeGenCXX/uncode-string.cpp @@ -3,4 +3,4 @@ wchar_t s[] = L"\u2722"; -// CHECK: @s = global [8 x i8] c"\22'\00\00\00\00\00\00" +// CHECK: @s = global [2 x i32] [i32 10018, i32 0], align 4 |