From 64f45a24b19eb89ff88f7c3ff0df9be8e861ac97 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 1 Nov 2011 02:23:42 +0000 Subject: Fix the representation of wide strings in the AST and IR so that it uses the native representation of integers for the elements. This fixes a bunch of nastiness involving treating wide strings as a series of bytes. Patch by Seth Cantrell. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@143417 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CodeGenModule.cpp | 75 +++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 24 deletions(-) (limited to 'lib/CodeGen/CodeGenModule.cpp') diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index c796e0daa9..0905c4b283 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -2037,6 +2037,8 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() { /// GetStringForStringLiteral - Return the appropriate bytes for a /// string literal, properly padded to match the literal type. std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) { + assert((E->isAscii() || E->isUTF8()) + && "Use GetConstantArrayFromStringLiteral for wide strings"); const ASTContext &Context = getContext(); const ConstantArrayType *CAT = Context.getAsConstantArrayType(E->getType()); @@ -2045,27 +2047,44 @@ std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) { // Resize the string to the right size. uint64_t RealLen = CAT->getSize().getZExtValue(); - switch (E->getKind()) { - case StringLiteral::Ascii: - case StringLiteral::UTF8: - break; - case StringLiteral::Wide: - RealLen *= Context.getTargetInfo().getWCharWidth() / Context.getCharWidth(); - break; - case StringLiteral::UTF16: - RealLen *= Context.getTargetInfo().getChar16Width() / Context.getCharWidth(); - break; - case StringLiteral::UTF32: - RealLen *= Context.getTargetInfo().getChar32Width() / Context.getCharWidth(); - break; - } - std::string Str = E->getString().str(); Str.resize(RealLen, '\0'); return Str; } +llvm::Constant * +CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { + assert(!E->getType()->isPointerType() && "Strings are always arrays"); + + // Don't emit it as the address of the string, emit the string data itself + // as an inline array. + if (E->getCharByteWidth()==1) { + return llvm::ConstantArray::get(VMContext, + GetStringForStringLiteral(E), false); + } else { + llvm::ArrayType *AType = + cast(getTypes().ConvertType(E->getType())); + llvm::Type *ElemTy = AType->getElementType(); + unsigned NumElements = AType->getNumElements(); + std::vector Elts; + Elts.reserve(NumElements); + + for(unsigned i=0;igetLength();++i) { + unsigned value = E->getCodeUnit(i); + llvm::Constant *C = llvm::ConstantInt::get(ElemTy,value,false); + Elts.push_back(C); + } + for(unsigned i=E->getLength();igetType()); - llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S), - /* GlobalName */ 0, - Align.getQuantity()); - if (S->isWide() || S->isUTF16() || S->isUTF32()) { - llvm::Type *DestTy = - llvm::PointerType::getUnqual(getTypes().ConvertType(S->getType())); - C = llvm::ConstantExpr::getBitCast(C, DestTy); - } - return C; + if (S->isAscii() || S->isUTF8()) { + return GetAddrOfConstantString(GetStringForStringLiteral(S), + /* GlobalName */ 0, + Align.getQuantity()); + } + + // FIXME: the following does not memoize wide strings + llvm::Constant *C = GetConstantArrayFromStringLiteral(S); + llvm::GlobalVariable *GV = + new llvm::GlobalVariable(getModule(),C->getType(), + !Features.WritableStrings, + llvm::GlobalValue::PrivateLinkage, + C,".str"); + GV->setAlignment(Align.getQuantity()); + GV->setUnnamedAddr(true); + + return GV; } /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant -- cgit v1.2.3-70-g09d2