diff options
author | Lang Hames <lhames@gmail.com> | 2013-02-11 23:44:11 +0000 |
---|---|---|
committer | Lang Hames <lhames@gmail.com> | 2013-02-11 23:44:11 +0000 |
commit | 5310859d0dcec2f3efbeb155ae1cfee14bdd2836 (patch) | |
tree | 0b05d96abea6cea8a4cf305c262644ab17b43c5b /lib/CodeGen/CGClass.cpp | |
parent | 6e8dfc7a6cbf6dec743aab4df7d55575c6945607 (diff) |
When generating IR for default copy-constructors, copy-assignment operators,
move-constructors and move-assignment operators, use memcpy to copy adjacent
POD members.
Previously, classes with one or more Non-POD members would fall back on
element-wise copies for all members, including POD members. This often
generated a lot of IR. Without padding metadata, it wasn't often possible
for the LLVM optimizers to turn the element-wise copies into a memcpy.
This code hasn't yet received any serious tuning. I didn't see any serious
regressions on a self-hosted clang build, or any of the nightly tests, but
I think it's important to get this out in the wild to get more testing.
Insights, feedback and comments welcome.
Many thanks to David Blaikie, Richard Smith, and especially John McCall for
their help and feedback on this work.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@174919 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/CGClass.cpp')
-rw-r--r-- | lib/CodeGen/CGClass.cpp | 359 |
1 files changed, 358 insertions, 1 deletions
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index ce32acd0da..6bee452cb0 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -18,6 +18,7 @@ #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtCXX.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Frontend/CodeGenOptions.h" using namespace clang; @@ -742,6 +743,342 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) { ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true); } +namespace { + class FieldMemcpyizer { + public: + FieldMemcpyizer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl, + const VarDecl *SrcRec) + : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec), + RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)), + FirstField(0), LastField(0), FirstFieldOffset(0), LastFieldOffset(0), + LastAddedFieldIndex(0) { } + + static bool isMemcpyableField(FieldDecl *F) { + Qualifiers Qual = F->getType().getQualifiers(); + if (Qual.hasVolatile() || Qual.hasObjCLifetime()) + return false; + return true; + } + + void addMemcpyableField(FieldDecl *F) { + if (FirstField == 0) + addInitialField(F); + else + addNextField(F); + } + + CharUnits getMemcpySize() const { + unsigned LastFieldSize = + CGF.getContext().getTypeInfo(LastField->getType()).first; + uint64_t MemcpySizeBits = + LastFieldOffset + LastFieldSize - FirstFieldOffset + + CGF.getContext().getCharWidth() - 1; + CharUnits MemcpySize = + CGF.getContext().toCharUnitsFromBits(MemcpySizeBits); + return MemcpySize; + } + + void emitMemcpy() { + // Give the subclass a chance to bail out if it feels the memcpy isn't + // worth it (e.g. Hasn't aggregated enough data). + if (FirstField == 0) { + return; + } + + unsigned FirstFieldAlign = + CGF.getContext().getTypeInfo(FirstField->getType()).second; + assert(FirstFieldOffset % FirstFieldAlign == 0 && "Bad field alignment."); + CharUnits Alignment = + CGF.getContext().toCharUnitsFromBits(FirstFieldAlign); + CharUnits MemcpySize = getMemcpySize(); + QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl); + llvm::Value *ThisPtr = CGF.LoadCXXThis(); + LValue DestLV = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy); + LValue Dest = CGF.EmitLValueForFieldInitialization(DestLV, FirstField); + llvm::Value *SrcPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(SrcRec)); + LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy); + LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField); + + emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddr() : Dest.getAddress(), + Src.isBitField() ? Src.getBitFieldAddr() : Src.getAddress(), + MemcpySize, Alignment); + reset(); + } + + void reset() { + FirstField = 0; + } + + protected: + CodeGenFunction &CGF; + const CXXRecordDecl *ClassDecl; + + private: + + void emitMemcpyIR(llvm::Value *DestPtr, llvm::Value *SrcPtr, + CharUnits Size, CharUnits Alignment) { + llvm::PointerType *DPT = cast<llvm::PointerType>(DestPtr->getType()); + llvm::Type *DBP = + llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), DPT->getAddressSpace()); + DestPtr = CGF.Builder.CreateBitCast(DestPtr, DBP); + + llvm::PointerType *SPT = cast<llvm::PointerType>(SrcPtr->getType()); + llvm::Type *SBP = + llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), SPT->getAddressSpace()); + SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, SBP); + + CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity(), + Alignment.getQuantity()); + } + + void addInitialField(FieldDecl *F) { + FirstField = F; + LastField = F; + FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex()); + LastFieldOffset = FirstFieldOffset; + LastAddedFieldIndex = F->getFieldIndex(); + return; + } + + void addNextField(FieldDecl *F) { + assert(F->getFieldIndex() == LastAddedFieldIndex + 1 && + "Cannot aggregate non-contiguous fields."); + LastAddedFieldIndex = F->getFieldIndex(); + + // The 'first' and 'last' fields are chosen by offset, rather than field + // index. This allows the code to support bitfields, as well as regular + // fields. + uint64_t FOffset = RecLayout.getFieldOffset(F->getFieldIndex()); + if (FOffset < FirstFieldOffset) { + FirstField = F; + FirstFieldOffset = FOffset; + } else if (FOffset > LastFieldOffset) { + LastField = F; + LastFieldOffset = FOffset; + } + } + + const VarDecl *SrcRec; + const ASTRecordLayout &RecLayout; + FieldDecl *FirstField; + FieldDecl *LastField; + uint64_t FirstFieldOffset, LastFieldOffset; + unsigned LastAddedFieldIndex; + }; + + class ConstructorMemcpyizer : public FieldMemcpyizer { + private: + + /// Get source argument for copy constructor. Returns null if not a copy + /// constructor. + static const VarDecl* getTrivialCopySource(const CXXConstructorDecl *CD, + FunctionArgList &Args) { + if (CD->isCopyOrMoveConstructor() && CD->isImplicitlyDefined()) + return Args[Args.size() - 1]; + return 0; + } + + // Returns true if a CXXCtorInitializer represents a member initialization + // that can be rolled into a memcpy. + bool isMemberInitMemcpyable(CXXCtorInitializer *MemberInit) const { + if (!MemcpyableCtor) + return false; + FieldDecl *Field = MemberInit->getMember(); + assert(Field != 0 && "No field for member init."); + QualType FieldType = Field->getType(); + CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit()); + + // Bail out on non-POD, not-trivially-constructable members. + if (!(CE && CE->getConstructor()->isTrivial()) && + !(FieldType.isTriviallyCopyableType(CGF.getContext()) || + FieldType->isReferenceType())) + return false; + + // Bail out on volatile fields. + if (!isMemcpyableField(Field)) + return false; + + // Otherwise we're good. + return true; + } + + public: + ConstructorMemcpyizer(CodeGenFunction &CGF, const CXXConstructorDecl *CD, + FunctionArgList &Args) + : FieldMemcpyizer(CGF, CD->getParent(), getTrivialCopySource(CD, Args)), + ConstructorDecl(CD), + MemcpyableCtor(CD->isImplicitlyDefined() && + CD->isCopyOrMoveConstructor() && + CGF.getLangOpts().getGC() == LangOptions::NonGC), + Args(Args) { } + + void addMemberInitializer(CXXCtorInitializer *MemberInit) { + if (isMemberInitMemcpyable(MemberInit)) { + AggregatedInits.push_back(MemberInit); + addMemcpyableField(MemberInit->getMember()); + } else { + emitAggregatedInits(); + EmitMemberInitializer(CGF, ConstructorDecl->getParent(), MemberInit, + ConstructorDecl, Args); + } + } + + void emitAggregatedInits() { + if (AggregatedInits.size() <= 1) { + // This memcpy is too small to be worthwhile. Fall back on default + // codegen. + for (unsigned i = 0; i < AggregatedInits.size(); ++i) { + EmitMemberInitializer(CGF, ConstructorDecl->getParent(), + AggregatedInits[i], ConstructorDecl, Args); + } + reset(); + return; + } + + pushEHDestructors(); + emitMemcpy(); + AggregatedInits.clear(); + } + + void pushEHDestructors() { + llvm::Value *ThisPtr = CGF.LoadCXXThis(); + QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl); + LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy); + + for (unsigned i = 0; i < AggregatedInits.size(); ++i) { + QualType FieldType = AggregatedInits[i]->getMember()->getType(); + QualType::DestructionKind dtorKind = FieldType.isDestructedType(); + if (CGF.needsEHCleanup(dtorKind)) + CGF.pushEHDestroy(dtorKind, LHS.getAddress(), FieldType); + } + } + + void finish() { + emitAggregatedInits(); + } + + private: + const CXXConstructorDecl *ConstructorDecl; + bool MemcpyableCtor; + FunctionArgList &Args; + SmallVector<CXXCtorInitializer*, 16> AggregatedInits; + }; + + class AssignmentMemcpyizer : public FieldMemcpyizer { + private: + + // Returns the memcpyable field copied by the given statement, if one + // exists. Otherwise r + FieldDecl* getMemcpyableField(Stmt *S) { + if (!AssignmentsMemcpyable) + return 0; + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) { + // Recognise trivial assignments. + if (BO->getOpcode() != BO_Assign) + return 0; + MemberExpr *ME = dyn_cast<MemberExpr>(BO->getLHS()); + if (!ME) + return 0; + FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl()); + if (!Field || !isMemcpyableField(Field)) + return 0; + Stmt *RHS = BO->getRHS(); + if (ImplicitCastExpr *EC = dyn_cast<ImplicitCastExpr>(RHS)) + RHS = EC->getSubExpr(); + if (!RHS) + return 0; + MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS); + if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field) + return 0; + return Field; + } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) { + CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl()); + if (!(MD && (MD->isCopyAssignmentOperator() || + MD->isMoveAssignmentOperator()) && + MD->isTrivial())) + return 0; + MemberExpr *IOA = dyn_cast<MemberExpr>(MCE->getImplicitObjectArgument()); + if (!IOA) + return 0; + FieldDecl *Field = dyn_cast<FieldDecl>(IOA->getMemberDecl()); + if (!Field || !isMemcpyableField(Field)) + return 0; + MemberExpr *Arg0 = dyn_cast<MemberExpr>(MCE->getArg(0)); + if (!Arg0 || Field != dyn_cast<FieldDecl>(Arg0->getMemberDecl())) + return 0; + return Field; + } else if (CallExpr *CE = dyn_cast<CallExpr>(S)) { + FunctionDecl *FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl()); + if (!FD || FD->getBuiltinID() != Builtin::BI__builtin_memcpy) + return 0; + Expr *DstPtr = CE->getArg(0); + if (ImplicitCastExpr *DC = dyn_cast<ImplicitCastExpr>(DstPtr)) + DstPtr = DC->getSubExpr(); + UnaryOperator *DUO = dyn_cast<UnaryOperator>(DstPtr); + if (!DUO || DUO->getOpcode() != UO_AddrOf) + return 0; + MemberExpr *ME = dyn_cast<MemberExpr>(DUO->getSubExpr()); + if (!ME) + return 0; + FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl()); + if (!Field || !isMemcpyableField(Field)) + return 0; + Expr *SrcPtr = CE->getArg(1); + if (ImplicitCastExpr *SC = dyn_cast<ImplicitCastExpr>(SrcPtr)) + SrcPtr = SC->getSubExpr(); + UnaryOperator *SUO = dyn_cast<UnaryOperator>(SrcPtr); + if (!SUO || SUO->getOpcode() != UO_AddrOf) + return 0; + MemberExpr *ME2 = dyn_cast<MemberExpr>(SUO->getSubExpr()); + if (!ME2 || Field != dyn_cast<FieldDecl>(ME2->getMemberDecl())) + return 0; + return Field; + } + + return 0; + } + + bool AssignmentsMemcpyable; + SmallVector<Stmt*, 16> AggregatedStmts; + + public: + + AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD, + FunctionArgList &Args) + : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]), + AssignmentsMemcpyable(CGF.getLangOpts().getGC() == LangOptions::NonGC) { + assert(Args.size() == 2); + } + + void emitAssignment(Stmt *S) { + FieldDecl *F = getMemcpyableField(S); + if (F) { + addMemcpyableField(F); + AggregatedStmts.push_back(S); + } else { + emitAggregatedStmts(); + CGF.EmitStmt(S); + } + } + + void emitAggregatedStmts() { + if (AggregatedStmts.size() <= 1) { + for (unsigned i = 0; i < AggregatedStmts.size(); ++i) + CGF.EmitStmt(AggregatedStmts[i]); + reset(); + } + + emitMemcpy(); + AggregatedStmts.clear(); + } + + void finish() { + emitAggregatedStmts(); + } + }; + +} + /// EmitCtorPrologue - This routine generates necessary code to initialize /// base classes and non-static data members belonging to this constructor. void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, @@ -770,8 +1107,10 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, InitializeVTablePointers(ClassDecl); + ConstructorMemcpyizer CM(*this, CD, Args); for (unsigned I = 0, E = MemberInitializers.size(); I != E; ++I) - EmitMemberInitializer(*this, ClassDecl, MemberInitializers[I], CD, Args); + CM.addMemberInitializer(MemberInitializers[I]); + CM.finish(); } static bool @@ -940,6 +1279,24 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true); } +void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) { + const CXXMethodDecl *AssignOp = cast<CXXMethodDecl>(CurGD.getDecl()); + const Stmt *RootS = AssignOp->getBody(); + assert(isa<CompoundStmt>(RootS) && + "Body of an implicit assignment operator should be compound stmt."); + const CompoundStmt *RootCS = cast<CompoundStmt>(RootS); + + LexicalScope Scope(*this, RootCS->getSourceRange()); + + AssignmentMemcpyizer AM(*this, AssignOp, Args); + for (CompoundStmt::const_body_iterator I = RootCS->body_begin(), + E = RootCS->body_end(); + I != E; ++I) { + AM.emitAssignment(*I); + } + AM.finish(); +} + namespace { /// Call the operator delete associated with the current destructor. struct CallDtorDelete : EHScopeStack::Cleanup { |