Fix the representation of wide strings in the AST and IR so that it uses the native representation of integers for the elements. This fixes a bunch of nastiness involving

treating wide strings as a series of bytes. Patch by Seth Cantrell. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@143417 91177308-0d34-0410-b5e6-96231b3b80d8
author: Eli Friedman <eli.friedman@gmail.com> 2011-11-01 02:23:42 +0000
committer: Eli Friedman <eli.friedman@gmail.com> 2011-11-01 02:23:42 +0000
commit: 64f45a24b19eb89ff88f7c3ff0df9be8e861ac97 (patch)
tree: de9dd9c4244910961e8d67b69a0a83be4306154f
parent: f74a4587629615ffd13bd0724868f86ba8c8f27b (diff)
15 files changed, 203 insertions, 82 deletions
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h
index ca884b955a..fd88e1e88e 100644
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1271,8 +1271,13 @@ public:
 private:
   friend class ASTStmtReader;
 
-  const char *StrData;
-  unsigned ByteLength;
+  union {
+    const char *asChar;
+    const uint16_t *asUInt16;
+    const uint32_t *asUInt32;
+  } StrData;
+  unsigned Length;
+  unsigned CharByteWidth;
   unsigned NumConcatenated;
   unsigned Kind : 3;
   bool IsPascal : 1;
@@ -1282,6 +1287,8 @@ private:
     Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary, false, false, false,
          false) {}
 
+  static int mapCharByteWidth(TargetInfo const &target,StringKind k);
+
 public:
   /// This is the "fully general" constructor that allows representation of
   /// strings formed from multiple concatenated tokens.
@@ -1300,15 +1307,52 @@ public:
   static StringLiteral *CreateEmpty(ASTContext &C, unsigned NumStrs);
 
   StringRef getString() const {
-    return StringRef(StrData, ByteLength);
+    assert(CharByteWidth==1
+           && "This function is used in places that assume strings use char");
+    return StringRef(StrData.asChar, getByteLength());
+  }
+
+  /// Allow clients that need the byte representation, such as ASTWriterStmt
+  /// ::VisitStringLiteral(), access.
+  StringRef getBytes() const {
+    // FIXME: StringRef may not be the right type to use as a result for this...
+    assert((CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4)
+           && "unsupported CharByteWidth");
+    if (CharByteWidth==4) {
+      return StringRef(reinterpret_cast<const char*>(StrData.asUInt32),
+                       getByteLength());
+    } else if (CharByteWidth==2) {
+      return StringRef(reinterpret_cast<const char*>(StrData.asUInt16),
+                       getByteLength());
+    } else {
+      return StringRef(StrData.asChar, getByteLength());
+    }
   }
 
-  unsigned getByteLength() const { return ByteLength; }
+  uint32_t getCodeUnit(size_t i) const {
+    assert(i<Length && "out of bounds access");
+    assert((CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4)
+           && "unsupported CharByteWidth");
+    if (CharByteWidth==4) {
+      return StrData.asUInt32[i];
+    } else if (CharByteWidth==2) {
+      return StrData.asUInt16[i];
+    } else {
+      return static_cast<unsigned char>(StrData.asChar[i]);
+    }
+  }
+
+  unsigned getByteLength() const { return CharByteWidth*Length; }
+  unsigned getLength() const { return Length; }
+  unsigned getCharByteWidth() const { return CharByteWidth; }
 
   /// \brief Sets the string data to the given string data.
-  void setString(ASTContext &C, StringRef Str);
+  void setString(ASTContext &C, StringRef Str,
+                 StringKind Kind, bool IsPascal);
 
   StringKind getKind() const { return static_cast<StringKind>(Kind); }
+  
+  
   bool isAscii() const { return Kind == Ascii; }
   bool isWide() const { return Kind == Wide; }
   bool isUTF8() const { return Kind == UTF8; }
@@ -1323,6 +1367,7 @@ public:
         return true;
     return false;
   }
+  
   /// getNumConcatenated - Get the number of string literal tokens that were
   /// concatenated in translation phase #6 to form this string literal.
   unsigned getNumConcatenated() const { return NumConcatenated; }
diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h
index 6f378041ae..635d3cb67e 100644
--- a/include/clang/Lex/LiteralSupport.h
+++ b/include/clang/Lex/LiteralSupport.h
@@ -189,11 +189,12 @@ public:
   /// checking of the string literal and emit errors and warnings.
   unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
 
-  bool isAscii() { return Kind == tok::string_literal; }
-  bool isWide() { return Kind == tok::wide_string_literal; }
-  bool isUTF8() { return Kind == tok::utf8_string_literal; }
-  bool isUTF16() { return Kind == tok::utf16_string_literal; }
-  bool isUTF32() { return Kind == tok::utf32_string_literal; }
+  bool isAscii() const { return Kind == tok::string_literal; }
+  bool isWide() const { return Kind == tok::wide_string_literal; }
+  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
+  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
+  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
+  bool isPascal() const { return Pascal; }
 
 private:
   void init(const Token *StringToks, unsigned NumStringToks);
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 96a8125e18..3239973688 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cstring>
 using namespace clang;
 
 /// isKnownToHaveBooleanValue - Return true if this is an integer expression
@@ -482,6 +483,29 @@ double FloatingLiteral::getValueAsApproximateDouble() const {
   return V.convertToDouble();
 }
 
+int StringLiteral::mapCharByteWidth(TargetInfo const &target,StringKind k) {
+  int CharByteWidth;
+  switch(k) {
+    case Ascii:
+    case UTF8:
+      CharByteWidth = target.getCharWidth();
+      break;
+    case Wide:
+      CharByteWidth = target.getWCharWidth();
+      break;
+    case UTF16:
+      CharByteWidth = target.getChar16Width();
+      break;
+    case UTF32:
+      CharByteWidth = target.getChar32Width();
+  }
+  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
+  CharByteWidth /= 8;
+  assert((CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4)
+         && "character byte widths supported are 1, 2, and 4 only");
+  return CharByteWidth;
+}
+
 StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str,
                                      StringKind Kind, bool Pascal, QualType Ty,
                                      const SourceLocation *Loc,
@@ -494,12 +518,8 @@ StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str,
   StringLiteral *SL = new (Mem) StringLiteral(Ty);
 
   // OPTIMIZE: could allocate this appended to the StringLiteral.
-  char *AStrData = new (C, 1) char[Str.size()];
-  memcpy(AStrData, Str.data(), Str.size());
-  SL->StrData = AStrData;
-  SL->ByteLength = Str.size();
-  SL->Kind = Kind;
-  SL->IsPascal = Pascal;
+  SL->setString(C,Str,Kind,Pascal);
+
   SL->TokLocs[0] = Loc[0];
   SL->NumConcatenated = NumStrs;
 
@@ -513,17 +533,46 @@ StringLiteral *StringLiteral::CreateEmpty(ASTContext &C, unsigned NumStrs) {
                          sizeof(SourceLocation)*(NumStrs-1),
                          llvm::alignOf<StringLiteral>());
   StringLiteral *SL = new (Mem) StringLiteral(QualType());
-  SL->StrData = 0;
-  SL->ByteLength = 0;
+  SL->CharByteWidth = 0;
+  SL->Length = 0;
   SL->NumConcatenated = NumStrs;
   return SL;
 }
 
-void StringLiteral::setString(ASTContext &C, StringRef Str) {
-  char *AStrData = new (C, 1) char[Str.size()];
-  memcpy(AStrData, Str.data(), Str.size());
-  StrData = AStrData;
-  ByteLength = Str.size();
+void StringLiteral::setString(ASTContext &C, StringRef Str,
+                              StringKind Kind, bool IsPascal) {
+  //FIXME: we assume that the string data comes from a target that uses the same
+  // code unit size and endianess for the type of string.
+  this->Kind = Kind;
+  this->IsPascal = IsPascal;
+  
+  CharByteWidth = mapCharByteWidth(C.getTargetInfo(),Kind);
+  assert((Str.size()%CharByteWidth == 0)
+         && "size of data must be multiple of CharByteWidth");
+  Length = Str.size()/CharByteWidth;
+
+  switch(CharByteWidth) {
+    case 1: {
+      char *AStrData = new (C) char[Length];
+      std::memcpy(AStrData,Str.data(),Str.size());
+      StrData.asChar = AStrData;
+      break;
+    }
+    case 2: {
+      uint16_t *AStrData = new (C) uint16_t[Length];
+      std::memcpy(AStrData,Str.data(),Str.size());
+      StrData.asUInt16 = AStrData;
+      break;
+    }
+    case 4: {
+      uint32_t *AStrData = new (C) uint32_t[Length];
+      std::memcpy(AStrData,Str.data(),Str.size());
+      StrData.asUInt32 = AStrData;
+      break;
+    }
+    default:
+      assert(false && "unsupported CharByteWidth");
+  }
 }
 
 /// getLocationOfByte - Return a source location that points to the specified
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 0622c10165..889cdd8f09 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -817,13 +817,7 @@ public:
   }
 
   llvm::Constant *VisitStringLiteral(StringLiteral *E) {
-    assert(!E->getType()->isPointerType() && "Strings are always arrays");
-
-    // This must be a string initializing an array in a static initializer.
-    // Don't emit it as the address of the string, emit the string data itself
-    // as an inline array.
-    return llvm::ConstantArray::get(VMContext,
-                                    CGM.GetStringForStringLiteral(E), false);
+    return CGM.GetConstantArrayFromStringLiteral(E);
   }
 
   llvm::Constant *VisitObjCEncodeExpr(ObjCEncodeExpr *E) {
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index c796e0daa9..0905c4b283 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -2037,6 +2037,8 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() {
 /// GetStringForStringLiteral - Return the appropriate bytes for a
 /// string literal, properly padded to match the literal type.
 std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) {
+  assert((E->isAscii() || E->isUTF8())
+         && "Use GetConstantArrayFromStringLiteral for wide strings");
   const ASTContext &Context = getContext();
   const ConstantArrayType *CAT =
     Context.getAsConstantArrayType(E->getType());
@@ -2045,27 +2047,44 @@ std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) {
   // Resize the string to the right size.
   uint64_t RealLen = CAT->getSize().getZExtValue();
 
-  switch (E->getKind()) {
-  case StringLiteral::Ascii:
-  case StringLiteral::UTF8:
-    break;
-  case StringLiteral::Wide:
-    RealLen *= Context.getTargetInfo().getWCharWidth() / Context.getCharWidth();
-    break;
-  case StringLiteral::UTF16:
-    RealLen *= Context.getTargetInfo().getChar16Width() / Context.getCharWidth();
-    break;
-  case StringLiteral::UTF32:
-    RealLen *= Context.getTargetInfo().getChar32Width() / Context.getCharWidth();
-    break;
-  }
-
   std::string Str = E->getString().str();
   Str.resize(RealLen, '\0');
 
   return Str;
 }
 
+llvm::Constant *
+CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) {
+  assert(!E->getType()->isPointerType() && "Strings are always arrays");
+  
+  // Don't emit it as the address of the string, emit the string data itself
+  // as an inline array.
+  if (E->getCharByteWidth()==1) {
+    return llvm::ConstantArray::get(VMContext,
+                                    GetStringForStringLiteral(E), false);
+  } else {
+    llvm::ArrayType *AType =
+      cast<llvm::ArrayType>(getTypes().ConvertType(E->getType()));
+    llvm::Type *ElemTy = AType->getElementType();
+    unsigned NumElements = AType->getNumElements();
+    std::vector<llvm::Constant*> Elts;
+    Elts.reserve(NumElements);
+    
+    for(unsigned i=0;i<E->getLength();++i) {
+      unsigned value = E->getCodeUnit(i);
+      llvm::Constant *C = llvm::ConstantInt::get(ElemTy,value,false);
+      Elts.push_back(C);
+    }
+    for(unsigned i=E->getLength();i<NumElements;++i) {
+      llvm::Constant *C = llvm::ConstantInt::get(ElemTy,0,false);
+      Elts.push_back(C);
+    }
+    
+    return llvm::ConstantArray::get(AType, Elts);
+  }
+
+}
+
 /// GetAddrOfConstantStringFromLiteral - Return a pointer to a
 /// constant array for the given string literal.
 llvm::Constant *
@@ -2073,15 +2092,23 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) {
   // FIXME: This can be more efficient.
   // FIXME: We shouldn't need to bitcast the constant in the wide string case.
   CharUnits Align = getContext().getTypeAlignInChars(S->getType());
-  llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S),
-                                              /* GlobalName */ 0,
-                                              Align.getQuantity());
-  if (S->isWide() || S->isUTF16() || S->isUTF32()) {
-    llvm::Type *DestTy =
-        llvm::PointerType::getUnqual(getTypes().ConvertType(S->getType()));
-    C = llvm::ConstantExpr::getBitCast(C, DestTy);
-  }
-  return C;
+  if (S->isAscii() || S->isUTF8()) {
+    return GetAddrOfConstantString(GetStringForStringLiteral(S),
+                                   /* GlobalName */ 0,
+                                   Align.getQuantity());
+  }
+
+  // FIXME: the following does not memoize wide strings
+  llvm::Constant *C = GetConstantArrayFromStringLiteral(S);
+  llvm::GlobalVariable *GV =
+    new llvm::GlobalVariable(getModule(),C->getType(),
+                             !Features.WritableStrings,
+                             llvm::GlobalValue::PrivateLinkage,
+                             C,".str");
+  GV->setAlignment(Align.getQuantity());
+  GV->setUnnamedAddr(true);
+  
+  return GV;
 }
 
 /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index ea2e177605..0ce698ae9f 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -565,6 +565,10 @@ public:
   /// -fconstant-string-class=class_name option.
   llvm::Constant *GetAddrOfConstantString(const StringLiteral *Literal);
 
+  /// GetConstantArrayFromStringLiteral - Return a constant array for the given
+  /// string.
+  llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E);
+
   /// GetAddrOfConstantStringFromLiteral - Return a pointer to a constant array
   /// for the given string literal.
   llvm::Constant *GetAddrOfConstantStringFromLiteral(const StringLiteral *S);
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index 21d0309377..61766a88c0 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -1141,7 +1141,7 @@ Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) {
     StrTy = Context.Char16Ty;
   else if (Literal.isUTF32())
     StrTy = Context.Char32Ty;
-  else if (Literal.Pascal)
+  else if (Literal.isPascal())
     StrTy = Context.UnsignedCharTy;
 
   StringLiteral::StringKind Kind = StringLiteral::Ascii;
diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp
index 87912af461..e57ab1937c 100644
--- a/lib/Serialization/ASTReaderStmt.cpp
+++ b/lib/Serialization/ASTReaderStmt.cpp
@@ -372,12 +372,13 @@ void ASTStmtReader::VisitStringLiteral(StringLiteral *E) {
   assert(Record[Idx] == E->getNumConcatenated() &&
          "Wrong number of concatenated tokens!");
   ++Idx;
-  E->Kind = static_cast<StringLiteral::StringKind>(Record[Idx++]);
-  E->IsPascal = Record[Idx++];
+  StringLiteral::StringKind kind =
+        static_cast<StringLiteral::StringKind>(Record[Idx++]);
+  bool isPascal = Record[Idx++];
 
   // Read string data
   llvm::SmallString<16> Str(&Record[Idx], &Record[Idx] + Len);
-  E->setString(Reader.getContext(), Str.str());
+  E->setString(Reader.getContext(), Str.str(), kind, isPascal);
   Idx += Len;
 
   // Read source locations
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index 0721c299a1..61570a880a 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -331,7 +331,7 @@ void ASTStmtWriter::VisitStringLiteral(StringLiteral *E) {
   // StringLiteral. However, we can't do so now because we have no
   // provision for coping with abbreviations when we're jumping around
   // the AST file during deserialization.
-  Record.append(E->getString().begin(), E->getString().end());
+  Record.append(E->getBytes().begin(), E->getBytes().end());
   for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
     Writer.AddSourceLocation(E->getStrTokenLoc(I), Record);
   Code = serialization::EXPR_STRING_LITERAL;
diff --git a/test/CodeGen/global-init.c b/test/CodeGen/global-init.c
index 074c2a065a..dab5a07d61 100644
--- a/test/CodeGen/global-init.c
+++ b/test/CodeGen/global-init.c
@@ -32,7 +32,7 @@ struct ManyFields FewInits = {1, 2};
 
 
 // PR6766
-// CHECK: @l = global { [24 x i8], i32 } { [24 x i8] c"f\00\00\00o\00\00\00o\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", i32 1 }
+// CHECK: @l = global %struct.K { [6 x i32] [i32 102, i32 111, i32 111, i32 0, i32 0, i32 0], i32 1 }
 typedef __WCHAR_TYPE__ wchar_t;
 struct K {
   wchar_t L[6];
diff --git a/test/CodeGen/pascal-wchar-string.c b/test/CodeGen/pascal-wchar-string.c
index a6b619643e..626fc99f15 100644
--- a/test/CodeGen/pascal-wchar-string.c
+++ b/test/CodeGen/pascal-wchar-string.c
@@ -29,8 +29,8 @@ int main(int argc, char* argv[])
         return 0;
 }
 
-// CHECK: c"\03\00b\00a\00r\00\00\00"
-// CHECK: c"\04\00g\00o\00r\00f\00\00\00"
+// CHECK: [i16 3, i16 98, i16 97, i16 114, i16 0]
+// CHECK: [i16 4, i16 103, i16 111, i16 114, i16 102, i16 0]
 
 
 // PR8856 - -fshort-wchar makes wchar_t be unsigned.
diff --git a/test/CodeGen/string-literal-short-wstring.c b/test/CodeGen/string-literal-short-wstring.c
index 770c3d4268..309ffd33b2 100644
--- a/test/CodeGen/string-literal-short-wstring.c
+++ b/test/CodeGen/string-literal-short-wstring.c
@@ -6,11 +6,11 @@ int main() {
   // CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
   char b[10] = "\u1120\u0220\U00102030";
 
-  // CHECK: private unnamed_addr constant [6 x i8] c"A\00B\00\00\00"
+  // CHECK: private unnamed_addr constant [3 x i16] [i16 65, i16 66, i16 0]
   const wchar_t *foo = L"AB";
 
   // This should convert to utf16.
-  // CHECK: private unnamed_addr constant [10 x i8] c" \11 \02\C8\DB0\DC\00\00"
+  // CHECK: private unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0]
   const wchar_t *bar = L"\u1120\u0220\U00102030";
 
 
diff --git a/test/CodeGen/string-literal.c b/test/CodeGen/string-literal.c
index fa8f28a766..12d431a454 100644
--- a/test/CodeGen/string-literal.c
+++ b/test/CodeGen/string-literal.c
@@ -14,37 +14,37 @@ int main() {
   // CHECK-CPP0X: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
   char b[10] = "\u1120\u0220\U00102030";
 
-  // CHECK-C: private unnamed_addr constant [12 x i8] c"A\00\00\00B\00\00\00\00\00\00\00", align 4
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"A\00\00\00B\00\00\00\00\00\00\00", align 4
+  // CHECK-C: private unnamed_addr constant [3 x i32] [i32 65, i32 66, i32 0], align 4
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 65, i32 66, i32 0], align 4
   const wchar_t *foo = L"AB";
 
-  // CHECK-C: private unnamed_addr constant [12 x i8] c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 4
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 4
+  // CHECK-C: private unnamed_addr constant [3 x i32] [i32 4660, i32 1110027, i32 0], align 4
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 4660, i32 1110027, i32 0], align 4
   const wchar_t *bar = L"\u1234\U0010F00B";
 
-  // CHECK-C: private unnamed_addr constant [12 x i8] c"4\12\00\00\0C\F0\10\00\00\00\00\00", align 4
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"4\12\00\00\0C\F0\10\00\00\00\00\00", align 4
+  // CHECK-C: private unnamed_addr constant [3 x i32] [i32 4660, i32 1110028, i32 0], align 4
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 4660, i32 1110028, i32 0], align 4
   const wchar_t *baz = L"\u1234" "\U0010F00C";
 
 #if __cplusplus >= 201103L
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"C\00\00\00D\00\00\00\00\00\00\00", align 4
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 67, i32 68, i32 0], align 4
   const char32_t *c = U"CD";
 
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"5\12\00\00\0C\F0\10\00\00\00\00\00", align 4
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 4661, i32 1110028, i32 0], align 4
   const char32_t *d = U"\u1235\U0010F00C";
 
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"5\12\00\00\0B\F0\10\00\00\00\00\00", align 4
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 4661, i32 1110027, i32 0], align 4
   const char32_t *o = "\u1235" U"\U0010F00B";
 
-  // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"E\00F\00\00\00", align 2
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i16] [i16 69, i16 70, i16 0], align 2
   const char16_t *e = u"EF";
 
   // This should convert to utf16.
-  // CHECK-CPP0X: private unnamed_addr constant [10 x i8] c" \11 \02\C8\DB0\DC\00\00", align 2
+  // CHECK-CPP0X: private unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0], align 2
   const char16_t *f = u"\u1120\u0220\U00102030";
 
   // This should convert to utf16.
-  // CHECK-CPP0X: private unnamed_addr constant [10 x i8] c" \11 \03\C8\DB0\DC\00\00", align 2
+  // CHECK-CPP0X: private unnamed_addr constant [5 x i16] [i16 4384, i16 800, i16 -9272, i16 -9168, i16 0], align 2
   const char16_t *p = u"\u1120\u0320" "\U00102030";
 
   // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1
@@ -56,13 +56,13 @@ int main() {
   // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"jkl\00", align 1
   const char *i = u8R"bar(jkl)bar";
 
-  // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"G\00H\00\00\00", align 2
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i16] [i16 71, i16 72, i16 0], align 2
   const char16_t *j = uR"foo(GH)foo";
 
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"I\00\00\00J\00\00\00\00\00\00\00", align 4
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 73, i32 74, i32 0], align 4
   const char32_t *k = UR"bar(IJ)bar";
 
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"K\00\00\00L\00\00\00\00\00\00\00", align 4
+  // CHECK-CPP0X: private unnamed_addr constant [3 x i32] [i32 75, i32 76, i32 0], align 4
   const wchar_t *l = LR"bar(KL)bar";
 
   // CHECK-CPP0X: private unnamed_addr constant [9 x i8] c"abc\5Cndef\00", align 1
diff --git a/test/CodeGen/wchar-const.c b/test/CodeGen/wchar-const.c
index b672b15360..a9e7e523f9 100644
--- a/test/CodeGen/wchar-const.c
+++ b/test/CodeGen/wchar-const.c
@@ -14,8 +14,8 @@ typedef __WCHAR_TYPE__ wchar_t;
 #endif
 
 
-// CHECK-DAR: private unnamed_addr constant [72 x i8] c"
-// CHECK-WIN: private unnamed_addr constant [36 x i8] c"
+// CHECK-DAR: private unnamed_addr constant [18 x i32] [i32 84,
+// CHECK-WIN: private unnamed_addr constant [18 x i16] [i16 84,
 extern void foo(const wchar_t* p);
 int main (int argc, const char * argv[])
 {
diff --git a/test/CodeGenCXX/uncode-string.cpp b/test/CodeGenCXX/uncode-string.cpp
index e543149747..1d839992f9 100644
--- a/test/CodeGenCXX/uncode-string.cpp
+++ b/test/CodeGenCXX/uncode-string.cpp
@@ -3,4 +3,4 @@
 
 wchar_t s[] = L"\u2722";
 
-// CHECK: @s = global [8 x i8] c"\22'\00\00\00\00\00\00"
+// CHECK: @s = global [2 x i32] [i32 10018, i32 0], align 4
author	Eli Friedman <eli.friedman@gmail.com>	2011-11-01 02:23:42 +0000
committer	Eli Friedman <eli.friedman@gmail.com>	2011-11-01 02:23:42 +0000
commit	64f45a24b19eb89ff88f7c3ff0df9be8e861ac97 (patch)
tree	de9dd9c4244910961e8d67b69a0a83be4306154f
parent	f74a4587629615ffd13bd0724868f86ba8c8f27b (diff)