aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/CGClass.cpp
diff options
context:
space:
mode:
authorLang Hames <lhames@gmail.com>2013-02-11 23:44:11 +0000
committerLang Hames <lhames@gmail.com>2013-02-11 23:44:11 +0000
commit5310859d0dcec2f3efbeb155ae1cfee14bdd2836 (patch)
tree0b05d96abea6cea8a4cf305c262644ab17b43c5b /lib/CodeGen/CGClass.cpp
parent6e8dfc7a6cbf6dec743aab4df7d55575c6945607 (diff)
When generating IR for default copy-constructors, copy-assignment operators,
move-constructors and move-assignment operators, use memcpy to copy adjacent POD members. Previously, classes with one or more Non-POD members would fall back on element-wise copies for all members, including POD members. This often generated a lot of IR. Without padding metadata, it wasn't often possible for the LLVM optimizers to turn the element-wise copies into a memcpy. This code hasn't yet received any serious tuning. I didn't see any serious regressions on a self-hosted clang build, or any of the nightly tests, but I think it's important to get this out in the wild to get more testing. Insights, feedback and comments welcome. Many thanks to David Blaikie, Richard Smith, and especially John McCall for their help and feedback on this work. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@174919 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/CGClass.cpp')
-rw-r--r--lib/CodeGen/CGClass.cpp359
1 files changed, 358 insertions, 1 deletions
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index ce32acd0da..6bee452cb0 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -18,6 +18,7 @@
#include "clang/AST/EvaluatedExprVisitor.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtCXX.h"
+#include "clang/Basic/TargetBuiltins.h"
#include "clang/Frontend/CodeGenOptions.h"
using namespace clang;
@@ -742,6 +743,342 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
}
+namespace {
+ class FieldMemcpyizer {
+ public:
+ FieldMemcpyizer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl,
+ const VarDecl *SrcRec)
+ : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec),
+ RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)),
+ FirstField(0), LastField(0), FirstFieldOffset(0), LastFieldOffset(0),
+ LastAddedFieldIndex(0) { }
+
+ static bool isMemcpyableField(FieldDecl *F) {
+ Qualifiers Qual = F->getType().getQualifiers();
+ if (Qual.hasVolatile() || Qual.hasObjCLifetime())
+ return false;
+ return true;
+ }
+
+ void addMemcpyableField(FieldDecl *F) {
+ if (FirstField == 0)
+ addInitialField(F);
+ else
+ addNextField(F);
+ }
+
+ CharUnits getMemcpySize() const {
+ unsigned LastFieldSize =
+ CGF.getContext().getTypeInfo(LastField->getType()).first;
+ uint64_t MemcpySizeBits =
+ LastFieldOffset + LastFieldSize - FirstFieldOffset +
+ CGF.getContext().getCharWidth() - 1;
+ CharUnits MemcpySize =
+ CGF.getContext().toCharUnitsFromBits(MemcpySizeBits);
+ return MemcpySize;
+ }
+
+ void emitMemcpy() {
+ // Give the subclass a chance to bail out if it feels the memcpy isn't
+ // worth it (e.g. Hasn't aggregated enough data).
+ if (FirstField == 0) {
+ return;
+ }
+
+ unsigned FirstFieldAlign =
+ CGF.getContext().getTypeInfo(FirstField->getType()).second;
+ assert(FirstFieldOffset % FirstFieldAlign == 0 && "Bad field alignment.");
+ CharUnits Alignment =
+ CGF.getContext().toCharUnitsFromBits(FirstFieldAlign);
+ CharUnits MemcpySize = getMemcpySize();
+ QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+ llvm::Value *ThisPtr = CGF.LoadCXXThis();
+ LValue DestLV = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+ LValue Dest = CGF.EmitLValueForFieldInitialization(DestLV, FirstField);
+ llvm::Value *SrcPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(SrcRec));
+ LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy);
+ LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField);
+
+ emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddr() : Dest.getAddress(),
+ Src.isBitField() ? Src.getBitFieldAddr() : Src.getAddress(),
+ MemcpySize, Alignment);
+ reset();
+ }
+
+ void reset() {
+ FirstField = 0;
+ }
+
+ protected:
+ CodeGenFunction &CGF;
+ const CXXRecordDecl *ClassDecl;
+
+ private:
+
+ void emitMemcpyIR(llvm::Value *DestPtr, llvm::Value *SrcPtr,
+ CharUnits Size, CharUnits Alignment) {
+ llvm::PointerType *DPT = cast<llvm::PointerType>(DestPtr->getType());
+ llvm::Type *DBP =
+ llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), DPT->getAddressSpace());
+ DestPtr = CGF.Builder.CreateBitCast(DestPtr, DBP);
+
+ llvm::PointerType *SPT = cast<llvm::PointerType>(SrcPtr->getType());
+ llvm::Type *SBP =
+ llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), SPT->getAddressSpace());
+ SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, SBP);
+
+ CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity(),
+ Alignment.getQuantity());
+ }
+
+ void addInitialField(FieldDecl *F) {
+ FirstField = F;
+ LastField = F;
+ FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+ LastFieldOffset = FirstFieldOffset;
+ LastAddedFieldIndex = F->getFieldIndex();
+ return;
+ }
+
+ void addNextField(FieldDecl *F) {
+ assert(F->getFieldIndex() == LastAddedFieldIndex + 1 &&
+ "Cannot aggregate non-contiguous fields.");
+ LastAddedFieldIndex = F->getFieldIndex();
+
+ // The 'first' and 'last' fields are chosen by offset, rather than field
+ // index. This allows the code to support bitfields, as well as regular
+ // fields.
+ uint64_t FOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+ if (FOffset < FirstFieldOffset) {
+ FirstField = F;
+ FirstFieldOffset = FOffset;
+ } else if (FOffset > LastFieldOffset) {
+ LastField = F;
+ LastFieldOffset = FOffset;
+ }
+ }
+
+ const VarDecl *SrcRec;
+ const ASTRecordLayout &RecLayout;
+ FieldDecl *FirstField;
+ FieldDecl *LastField;
+ uint64_t FirstFieldOffset, LastFieldOffset;
+ unsigned LastAddedFieldIndex;
+ };
+
+ class ConstructorMemcpyizer : public FieldMemcpyizer {
+ private:
+
+ /// Get source argument for copy constructor. Returns null if not a copy
+ /// constructor.
+ static const VarDecl* getTrivialCopySource(const CXXConstructorDecl *CD,
+ FunctionArgList &Args) {
+ if (CD->isCopyOrMoveConstructor() && CD->isImplicitlyDefined())
+ return Args[Args.size() - 1];
+ return 0;
+ }
+
+ // Returns true if a CXXCtorInitializer represents a member initialization
+ // that can be rolled into a memcpy.
+ bool isMemberInitMemcpyable(CXXCtorInitializer *MemberInit) const {
+ if (!MemcpyableCtor)
+ return false;
+ FieldDecl *Field = MemberInit->getMember();
+ assert(Field != 0 && "No field for member init.");
+ QualType FieldType = Field->getType();
+ CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit());
+
+ // Bail out on non-POD, not-trivially-constructable members.
+ if (!(CE && CE->getConstructor()->isTrivial()) &&
+ !(FieldType.isTriviallyCopyableType(CGF.getContext()) ||
+ FieldType->isReferenceType()))
+ return false;
+
+ // Bail out on volatile fields.
+ if (!isMemcpyableField(Field))
+ return false;
+
+ // Otherwise we're good.
+ return true;
+ }
+
+ public:
+ ConstructorMemcpyizer(CodeGenFunction &CGF, const CXXConstructorDecl *CD,
+ FunctionArgList &Args)
+ : FieldMemcpyizer(CGF, CD->getParent(), getTrivialCopySource(CD, Args)),
+ ConstructorDecl(CD),
+ MemcpyableCtor(CD->isImplicitlyDefined() &&
+ CD->isCopyOrMoveConstructor() &&
+ CGF.getLangOpts().getGC() == LangOptions::NonGC),
+ Args(Args) { }
+
+ void addMemberInitializer(CXXCtorInitializer *MemberInit) {
+ if (isMemberInitMemcpyable(MemberInit)) {
+ AggregatedInits.push_back(MemberInit);
+ addMemcpyableField(MemberInit->getMember());
+ } else {
+ emitAggregatedInits();
+ EmitMemberInitializer(CGF, ConstructorDecl->getParent(), MemberInit,
+ ConstructorDecl, Args);
+ }
+ }
+
+ void emitAggregatedInits() {
+ if (AggregatedInits.size() <= 1) {
+ // This memcpy is too small to be worthwhile. Fall back on default
+ // codegen.
+ for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
+ EmitMemberInitializer(CGF, ConstructorDecl->getParent(),
+ AggregatedInits[i], ConstructorDecl, Args);
+ }
+ reset();
+ return;
+ }
+
+ pushEHDestructors();
+ emitMemcpy();
+ AggregatedInits.clear();
+ }
+
+ void pushEHDestructors() {
+ llvm::Value *ThisPtr = CGF.LoadCXXThis();
+ QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+ LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+
+ for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
+ QualType FieldType = AggregatedInits[i]->getMember()->getType();
+ QualType::DestructionKind dtorKind = FieldType.isDestructedType();
+ if (CGF.needsEHCleanup(dtorKind))
+ CGF.pushEHDestroy(dtorKind, LHS.getAddress(), FieldType);
+ }
+ }
+
+ void finish() {
+ emitAggregatedInits();
+ }
+
+ private:
+ const CXXConstructorDecl *ConstructorDecl;
+ bool MemcpyableCtor;
+ FunctionArgList &Args;
+ SmallVector<CXXCtorInitializer*, 16> AggregatedInits;
+ };
+
+ class AssignmentMemcpyizer : public FieldMemcpyizer {
+ private:
+
+ // Returns the memcpyable field copied by the given statement, if one
+ // exists. Otherwise r
+ FieldDecl* getMemcpyableField(Stmt *S) {
+ if (!AssignmentsMemcpyable)
+ return 0;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) {
+ // Recognise trivial assignments.
+ if (BO->getOpcode() != BO_Assign)
+ return 0;
+ MemberExpr *ME = dyn_cast<MemberExpr>(BO->getLHS());
+ if (!ME)
+ return 0;
+ FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+ if (!Field || !isMemcpyableField(Field))
+ return 0;
+ Stmt *RHS = BO->getRHS();
+ if (ImplicitCastExpr *EC = dyn_cast<ImplicitCastExpr>(RHS))
+ RHS = EC->getSubExpr();
+ if (!RHS)
+ return 0;
+ MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS);
+ if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
+ return 0;
+ return Field;
+ } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
+ CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
+ if (!(MD && (MD->isCopyAssignmentOperator() ||
+ MD->isMoveAssignmentOperator()) &&
+ MD->isTrivial()))
+ return 0;
+ MemberExpr *IOA = dyn_cast<MemberExpr>(MCE->getImplicitObjectArgument());
+ if (!IOA)
+ return 0;
+ FieldDecl *Field = dyn_cast<FieldDecl>(IOA->getMemberDecl());
+ if (!Field || !isMemcpyableField(Field))
+ return 0;
+ MemberExpr *Arg0 = dyn_cast<MemberExpr>(MCE->getArg(0));
+ if (!Arg0 || Field != dyn_cast<FieldDecl>(Arg0->getMemberDecl()))
+ return 0;
+ return Field;
+ } else if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
+ FunctionDecl *FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl());
+ if (!FD || FD->getBuiltinID() != Builtin::BI__builtin_memcpy)
+ return 0;
+ Expr *DstPtr = CE->getArg(0);
+ if (ImplicitCastExpr *DC = dyn_cast<ImplicitCastExpr>(DstPtr))
+ DstPtr = DC->getSubExpr();
+ UnaryOperator *DUO = dyn_cast<UnaryOperator>(DstPtr);
+ if (!DUO || DUO->getOpcode() != UO_AddrOf)
+ return 0;
+ MemberExpr *ME = dyn_cast<MemberExpr>(DUO->getSubExpr());
+ if (!ME)
+ return 0;
+ FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+ if (!Field || !isMemcpyableField(Field))
+ return 0;
+ Expr *SrcPtr = CE->getArg(1);
+ if (ImplicitCastExpr *SC = dyn_cast<ImplicitCastExpr>(SrcPtr))
+ SrcPtr = SC->getSubExpr();
+ UnaryOperator *SUO = dyn_cast<UnaryOperator>(SrcPtr);
+ if (!SUO || SUO->getOpcode() != UO_AddrOf)
+ return 0;
+ MemberExpr *ME2 = dyn_cast<MemberExpr>(SUO->getSubExpr());
+ if (!ME2 || Field != dyn_cast<FieldDecl>(ME2->getMemberDecl()))
+ return 0;
+ return Field;
+ }
+
+ return 0;
+ }
+
+ bool AssignmentsMemcpyable;
+ SmallVector<Stmt*, 16> AggregatedStmts;
+
+ public:
+
+ AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD,
+ FunctionArgList &Args)
+ : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]),
+ AssignmentsMemcpyable(CGF.getLangOpts().getGC() == LangOptions::NonGC) {
+ assert(Args.size() == 2);
+ }
+
+ void emitAssignment(Stmt *S) {
+ FieldDecl *F = getMemcpyableField(S);
+ if (F) {
+ addMemcpyableField(F);
+ AggregatedStmts.push_back(S);
+ } else {
+ emitAggregatedStmts();
+ CGF.EmitStmt(S);
+ }
+ }
+
+ void emitAggregatedStmts() {
+ if (AggregatedStmts.size() <= 1) {
+ for (unsigned i = 0; i < AggregatedStmts.size(); ++i)
+ CGF.EmitStmt(AggregatedStmts[i]);
+ reset();
+ }
+
+ emitMemcpy();
+ AggregatedStmts.clear();
+ }
+
+ void finish() {
+ emitAggregatedStmts();
+ }
+ };
+
+}
+
/// EmitCtorPrologue - This routine generates necessary code to initialize
/// base classes and non-static data members belonging to this constructor.
void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
@@ -770,8 +1107,10 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
InitializeVTablePointers(ClassDecl);
+ ConstructorMemcpyizer CM(*this, CD, Args);
for (unsigned I = 0, E = MemberInitializers.size(); I != E; ++I)
- EmitMemberInitializer(*this, ClassDecl, MemberInitializers[I], CD, Args);
+ CM.addMemberInitializer(MemberInitializers[I]);
+ CM.finish();
}
static bool
@@ -940,6 +1279,24 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
}
+void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) {
+ const CXXMethodDecl *AssignOp = cast<CXXMethodDecl>(CurGD.getDecl());
+ const Stmt *RootS = AssignOp->getBody();
+ assert(isa<CompoundStmt>(RootS) &&
+ "Body of an implicit assignment operator should be compound stmt.");
+ const CompoundStmt *RootCS = cast<CompoundStmt>(RootS);
+
+ LexicalScope Scope(*this, RootCS->getSourceRange());
+
+ AssignmentMemcpyizer AM(*this, AssignOp, Args);
+ for (CompoundStmt::const_body_iterator I = RootCS->body_begin(),
+ E = RootCS->body_end();
+ I != E; ++I) {
+ AM.emitAssignment(*I);
+ }
+ AM.finish();
+}
+
namespace {
/// Call the operator delete associated with the current destructor.
struct CallDtorDelete : EHScopeStack::Cleanup {