diff options
-rw-r--r-- | lib/CodeGen/CGExpr.cpp | 5 | ||||
-rw-r--r-- | lib/CodeGen/CGExprConstant.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/CGObjC.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/CGObjCGNU.cpp | 10 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.cpp | 46 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.h | 29 | ||||
-rw-r--r-- | test/CodeGen/c-strings.c | 34 |
7 files changed, 102 insertions, 32 deletions
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 6bfd5cb309..e6bc5016e9 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -488,10 +488,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { } LValue CodeGenFunction::EmitStringLiteralLValue(const StringLiteral *E) { - llvm::Constant *C = - CGM.GetAddrOfConstantString(CGM.getStringForStringLiteral(E)); - - return LValue::MakeAddr(C,0); + return LValue::MakeAddr(CGM.GetAddrOfConstantStringFromLiteral(E), 0); } LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index 12620e7101..dfd047c89d 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -363,7 +363,7 @@ public: // Otherwise this must be a string initializing an array in a static // initializer. Don't emit it as the address of the string, emit the string // data itself as an inline array. - return llvm::ConstantArray::get(CGM.getStringForStringLiteral(E), false); + return llvm::ConstantArray::get(CGM.GetStringForStringLiteral(E), false); } llvm::Constant *VisitDeclRefExpr(DeclRefExpr *E) { @@ -762,10 +762,8 @@ public: "Taking the address of a vector component is illegal!"); return llvm::ConstantExpr::getGetElementPtr(Base, &Index, 1); } - case Expr::StringLiteralClass: { - StringLiteral *S = cast<StringLiteral>(E); - return CGM.GetAddrOfConstantString(CGM.getStringForStringLiteral(S)); - } + case Expr::StringLiteralClass: + return CGM.GetAddrOfConstantStringFromLiteral(cast<StringLiteral>(E)); case Expr::UnaryOperatorClass: { UnaryOperator *Exp = cast<UnaryOperator>(E); switch (Exp->getOpcode()) { diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 7270884572..2e400f4d2a 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -52,7 +52,7 @@ llvm::Value *CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E) { if (!strcmp(classname, "super")) { classname = E->getMethodDecl()->getClassInterface()->getName(); } - llvm::Value *ClassName = CGM.GetAddrOfConstantString(classname); + llvm::Value *ClassName = CGM.GetAddrOfConstantCString(classname); ClassName = Builder.CreateStructGEP(ClassName, 0); Receiver = Runtime.LookupClass(Builder, ClassName); } else if (const PredefinedExpr *PDE = diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index 61552c4505..c4d236b54d 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -370,7 +370,7 @@ llvm::Constant *CGObjCGNU::GenerateMethodList(const std::string &ClassName, std::vector<llvm::Constant*> Elements; for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) { Elements.clear(); - llvm::Constant *C = CGM.GetAddrOfConstantString(MethodSels[i].getName()); + llvm::Constant *C = CGM.GetAddrOfConstantCString(MethodSels[i].getName()); Elements.push_back(llvm::ConstantExpr::getGetElementPtr(C, Zeros, 2)); Elements.push_back( llvm::ConstantExpr::getGetElementPtr(MethodTypes[i], Zeros, 2)); @@ -581,8 +581,8 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { std::string TypeStr; Context.getObjCEncodingForMethodDecl(*iter, TypeStr); InstanceMethodNames.push_back( - CGM.GetAddrOfConstantString((*iter)->getSelector().getName())); - InstanceMethodTypes.push_back(CGM.GetAddrOfConstantString(TypeStr)); + CGM.GetAddrOfConstantCString((*iter)->getSelector().getName())); + InstanceMethodTypes.push_back(CGM.GetAddrOfConstantCString(TypeStr)); } // Collect information about class methods: llvm::SmallVector<llvm::Constant*, 16> ClassMethodNames; @@ -592,8 +592,8 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { std::string TypeStr; Context.getObjCEncodingForMethodDecl((*iter),TypeStr); ClassMethodNames.push_back( - CGM.GetAddrOfConstantString((*iter)->getSelector().getName())); - ClassMethodTypes.push_back(CGM.GetAddrOfConstantString(TypeStr)); + CGM.GetAddrOfConstantCString((*iter)->getSelector().getName())); + ClassMethodTypes.push_back(CGM.GetAddrOfConstantCString(TypeStr)); } llvm::Constant *ProtocolList = GenerateProtocolList(Protocols); diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index f575300b6d..c6b02f83b4 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -261,7 +261,7 @@ void CodeGenModule::EmitObjCCategoryImpl(const ObjCCategoryImplDecl *OCD) { InstanceMethodSels.push_back((*iter)->getSelector()); std::string TypeStr; Context.getObjCEncodingForMethodDecl(*iter,TypeStr); - InstanceMethodTypes.push_back(GetAddrOfConstantString(TypeStr)); + InstanceMethodTypes.push_back(GetAddrOfConstantCString(TypeStr)); } // Collect information about class methods @@ -272,7 +272,7 @@ void CodeGenModule::EmitObjCCategoryImpl(const ObjCCategoryImplDecl *OCD) { ClassMethodSels.push_back((*iter)->getSelector()); std::string TypeStr; Context.getObjCEncodingForMethodDecl(*iter,TypeStr); - ClassMethodTypes.push_back(GetAddrOfConstantString(TypeStr)); + ClassMethodTypes.push_back(GetAddrOfConstantCString(TypeStr)); } // Collect the names of referenced protocols @@ -325,13 +325,13 @@ void CodeGenModule::EmitObjCClassImplementation( for (ObjCInterfaceDecl::ivar_iterator iter = ClassDecl->ivar_begin(), endIter = ClassDecl->ivar_end() ; iter != endIter ; iter++) { // Store the name - IvarNames.push_back(GetAddrOfConstantString((*iter)->getName())); + IvarNames.push_back(GetAddrOfConstantCString((*iter)->getName())); // Get the type encoding for this ivar std::string TypeStr; llvm::SmallVector<const RecordType *, 8> EncodingRecordTypes; Context.getObjCEncodingForType((*iter)->getType(), TypeStr, EncodingRecordTypes); - IvarTypes.push_back(GetAddrOfConstantString(TypeStr)); + IvarTypes.push_back(GetAddrOfConstantCString(TypeStr)); // Get the offset int offset = (int)Layout->getElementOffset(getTypes().getLLVMFieldNo(*iter)); @@ -347,7 +347,7 @@ void CodeGenModule::EmitObjCClassImplementation( InstanceMethodSels.push_back((*iter)->getSelector()); std::string TypeStr; Context.getObjCEncodingForMethodDecl((*iter),TypeStr); - InstanceMethodTypes.push_back(GetAddrOfConstantString(TypeStr)); + InstanceMethodTypes.push_back(GetAddrOfConstantCString(TypeStr)); } // Collect information about class methods @@ -358,7 +358,7 @@ void CodeGenModule::EmitObjCClassImplementation( ClassMethodSels.push_back((*iter)->getSelector()); std::string TypeStr; Context.getObjCEncodingForMethodDecl((*iter),TypeStr); - ClassMethodTypes.push_back(GetAddrOfConstantString(TypeStr)); + ClassMethodTypes.push_back(GetAddrOfConstantCString(TypeStr)); } // Collect the names of referenced protocols llvm::SmallVector<std::string, 16> Protocols; @@ -888,9 +888,9 @@ GetAddrOfConstantCFString(const std::string &str) { return GV; } -/// getStringForStringLiteral - Return the appropriate bytes for a +/// GetStringForStringLiteral - Return the appropriate bytes for a /// string literal, properly padded to match the literal type. -std::string CodeGenModule::getStringForStringLiteral(const StringLiteral *E) { +std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) { assert(!E->isWide() && "FIXME: Wide strings not supported yet!"); const char *StrData = E->getStrData(); unsigned Len = E->getByteLength(); @@ -908,22 +908,37 @@ std::string CodeGenModule::getStringForStringLiteral(const StringLiteral *E) { return Str; } +/// GetAddrOfConstantStringFromLiteral - Return a pointer to a +/// constant array for the given string literal. +llvm::Constant * +CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) { + // FIXME: This can be more efficient. + return GetAddrOfConstantString(GetStringForStringLiteral(S)); +} + /// GenerateWritableString -- Creates storage for a string literal. static llvm::Constant *GenerateStringLiteral(const std::string &str, bool constant, CodeGenModule &CGM) { - // Create Constant for this string literal - llvm::Constant *C = llvm::ConstantArray::get(str); + // Create Constant for this string literal. Don't add a '\0'. + llvm::Constant *C = llvm::ConstantArray::get(str, false); // Create a global variable for this string C = new llvm::GlobalVariable(C->getType(), constant, llvm::GlobalValue::InternalLinkage, C, ".str", &CGM.getModule()); + return C; } -/// CodeGenModule::GetAddrOfConstantString -- returns a pointer to the character -/// array containing the literal. The result is pointer to array type. +/// GetAddrOfConstantString - Returns a pointer to a character array +/// containing the literal. This contents are exactly that of the +/// given string, i.e. it will not be null terminated automatically; +/// see GetAddrOfConstantCString. Note that whether the result is +/// actually a pointer to an LLVM constant depends on +/// Feature.WriteableStrings. +/// +/// The result has pointer to array type. llvm::Constant *CodeGenModule::GetAddrOfConstantString(const std::string &str) { // Don't share any string literals if writable-strings is turned on. if (Features.WritableStrings) @@ -940,3 +955,10 @@ llvm::Constant *CodeGenModule::GetAddrOfConstantString(const std::string &str) { Entry.setValue(C); return C; } + +/// GetAddrOfConstantCString - Returns a pointer to a character +/// array containing the literal and a terminating '\-' +/// character. The result has pointer to array type. +llvm::Constant *CodeGenModule::GetAddrOfConstantCString(const std::string &str) { + return GetAddrOfConstantCString(str + "\0"); +} diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index a837b6369c..dd8ac1739f 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -143,15 +143,34 @@ public: /// "__builtin_fabsf", return a Function* for "fabsf". /// llvm::Function *getBuiltinLibFunction(unsigned BuiltinID); + + /// GetStringForStringLiteral - Return the appropriate bytes for a + /// string literal, properly padded to match the literal type. If + /// only the address of a constant is needed consider using + /// GetAddrOfConstantStringLiteral. + std::string GetStringForStringLiteral(const StringLiteral *E); + llvm::Constant *GetAddrOfConstantCFString(const std::string& str); - /// getStringForStringLiteral - Return the appropriate bytes for a - /// string literal, properly padded to match the literal type. - std::string getStringForStringLiteral(const StringLiteral *E); + /// GetAddrOfConstantStringFromLiteral - Return a pointer to a + /// constant array for the given string literal. + llvm::Constant *GetAddrOfConstantStringFromLiteral(const StringLiteral *S); - /// GetAddrOfConstantString -- returns a pointer to the character - /// array containing the literal. The result is pointer to array type. + /// GetAddrOfConstantString - Returns a pointer to a character array + /// containing the literal. This contents are exactly that of the + /// given string, i.e. it will not be null terminated automatically; + /// see GetAddrOfConstantCString. Note that whether the result is + /// actually a pointer to an LLVM constant depends on + /// Feature.WriteableStrings. + /// + /// The result has pointer to array type. llvm::Constant *GetAddrOfConstantString(const std::string& str); + + /// GetAddrOfConstantCString - Returns a pointer to a character + /// array containing the literal and a terminating '\-' + /// character. The result has pointer to array type. + llvm::Constant *GetAddrOfConstantCString(const std::string &str); + llvm::Function *getMemCpyFn(); llvm::Function *getMemMoveFn(); llvm::Function *getMemSetFn(); diff --git a/test/CodeGen/c-strings.c b/test/CodeGen/c-strings.c new file mode 100644 index 0000000000..baff4ddfc1 --- /dev/null +++ b/test/CodeGen/c-strings.c @@ -0,0 +1,34 @@ +// RUN: clang -emit-llvm -o %t %s && +// RUN: grep "hello" %t | count 3 && +// RUN: grep 'c"hello\\00"' %t | count 2 && +// RUN: grep 'c"hello\\00\\00\\00"' %t | count 1 + +/* Should be 3 hello string, two global (of different sizes), the rest + are shared. */ + +void f0() { + bar("hello"); +} + +void f1() { + static char *x = "hello"; + bar(x); +} + +void f2() { + static char x[] = "hello"; + bar(x); +} + +void f3() { + static char x[8] = "hello"; + bar(x); +} + +void f4() { + static struct s { + char *name; + } x = { "hello" }; + gaz(&x); +} + |