From e07f85eb76a0254d3adbdf8b5d61ff5c07858cef Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 11 Dec 2012 23:26:14 +0000 Subject: Replace TargetLowering::isIntImmLegal() with ScalarTargetTransformInfo::getIntImmCost() instead. "Legal" is a poorly defined term for something like integer immediate materialization. It is always possible to materialize an integer immediate. Whether to use it for memcpy expansion is more a "cost" conceern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169929 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 29339405aa..4cb63ce9a6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" @@ -3382,7 +3383,10 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } - if (TLI.isIntImmLegal(Val, VT)) + // If the "cost" of materializing the integer immediate is 1 or free, then + // it is cost effective to turn the load into the immediate. + if (DAG.getTarget().getScalarTargetTransformInfo()-> + getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2) return DAG.getConstant(Val, VT); return SDValue(0, 0); } -- cgit v1.2.3-70-g09d2 From 61f4dfe3693bf68b20748d82ac4dd9bf2f356699 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 12 Dec 2012 00:42:09 +0000 Subject: Avoid using lossy load / stores for memcpy / memset expansion. e.g. f64 load / store on non-SSE2 x86 targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169944 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 9 ++++++++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 36 ++++++++++++++++++------------- lib/Target/ARM/ARMISelLowering.cpp | 4 ++++ lib/Target/ARM/ARMISelLowering.h | 7 ++++++ lib/Target/X86/X86ISelLowering.cpp | 8 +++++++ lib/Target/X86/X86ISelLowering.h | 7 ++++++ test/CodeGen/X86/memcpy-2.ll | 4 ++-- 7 files changed, 58 insertions(+), 17 deletions(-) (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp') diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index faf0a0179a..f301043851 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -716,6 +716,15 @@ public: return MVT::Other; } + /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// specified type to expand memcpy / memset inline. This is mostly true + /// for legal types except for some special cases. For example, on X86 + /// targets without SSE2 f64 load / store are done with fldl / fstpl which + /// also does type conversion. + virtual bool isLegalMemOpType(MVT VT) const { + return VT.isInteger(); + } + /// usesUnderscoreSetJmp - Determine if we should use _setjmp or setjmp /// to implement llvm.setjmp. bool usesUnderscoreSetJmp() const { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4cb63ce9a6..36592e54f8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3474,27 +3474,33 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. - EVT NewVT; + EVT NewVT = VT; unsigned NewVTSize; + + bool Found = false; if (VT.isVector() || VT.isFloatingPoint()) { NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; - while (!TLI.isOperationLegalOrCustom(ISD::STORE, NewVT)) { - if (NewVT == MVT::i64 && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64)) { - // i64 is usually not legal on 32-bit targets, but f64 may be. - NewVT = MVT::f64; - break; - } - NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && + TLI.isLegalMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + TLI.isLegalMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; } - NewVTSize = NewVT.getSizeInBits() / 8; - } else { - // This can result in a type that is not legal on the target, e.g. - // 1 or 2 bytes on PPC. - NewVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - NewVTSize = VTSize >> 1; } + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!TLI.isLegalMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. // FIXME: Only does this for 64-bit or more since we don't have proper diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 88282c7331..de7159e474 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9481,6 +9481,10 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::Other; } +bool ARMTargetLowering::isLegalMemOpType(MVT VT) const { + return VT.isInteger() || VT == MVT::f64 || VT == MVT::v2f64; +} + bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { if (Val.getOpcode() != ISD::LOAD) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5a44201ec4..3e78ae3b2d 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -296,6 +296,13 @@ namespace llvm { bool MemcpyStrSrc, MachineFunction &MF) const; + /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// specified type to expand memcpy / memset inline. This is mostly true + /// for legal types except for some special cases. For example, on X86 + /// targets without SSE2 f64 load / store are done with fldl / fstpl which + /// also does type conversion. + virtual bool isLegalMemOpType(MVT VT) const; + using TargetLowering::isZExtFree; virtual bool isZExtFree(SDValue Val, EVT VT2) const; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 90bee41e35..800c2012df 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1412,6 +1412,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } +bool X86TargetLowering::isLegalMemOpType(MVT VT) const { + if (VT == MVT::f32) + return X86ScalarSSEf32; + else if (VT == MVT::f64) + return X86ScalarSSEf64; + return VT.isInteger(); +} + bool X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { if (Fast) diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a515be23ef..9d22da1dd9 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -506,6 +506,13 @@ namespace llvm { bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; + /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// specified type to expand memcpy / memset inline. This is mostly true + /// for legal types except for some special cases. For example, on X86 + /// targets without SSE2 f64 load / store are done with fldl / fstpl which + /// also does type conversion. + virtual bool isLegalMemOpType(MVT VT) const; + /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. Returns whether it /// is "fast" by reference in the second argument. diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll index dcc8f0d268..949d6a4293 100644 --- a/test/CodeGen/X86/memcpy-2.ll +++ b/test/CodeGen/X86/memcpy-2.ll @@ -17,11 +17,11 @@ entry: ; SSE2: movb $0, 24(%esp) ; SSE1: t1: -; SSE1: fldl _.str+16 -; SSE1: fstpl 16(%esp) ; SSE1: movaps _.str, %xmm0 ; SSE1: movaps %xmm0 ; SSE1: movb $0, 24(%esp) +; SSE1: movl $0, 20(%esp) +; SSE1: movl $0, 16(%esp) ; NOSSE: t1: ; NOSSE: movb $0 -- cgit v1.2.3-70-g09d2 From 7d34267df63e23be1957f738de783c145febb7af Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 12 Dec 2012 01:32:07 +0000 Subject: - Rename isLegalMemOpType to isSafeMemOpType. "Legal" is a very overloade term. Also added more comments to explain why it is generally ok to return true. - Rename getOptimalMemOpType argument IsZeroVal to ZeroOrLdSrc. It's meant to be true for loaded source (memcpy) or zero constants (memset). The poor name choice is probably some kind of legacy issue. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169954 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 15 ++++++++------- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 +++++----- lib/Target/ARM/ARMISelLowering.cpp | 8 ++------ lib/Target/ARM/ARMISelLowering.h | 9 +-------- lib/Target/Mips/MipsISelLowering.cpp | 2 +- lib/Target/Mips/MipsISelLowering.h | 2 +- lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++-- lib/Target/PowerPC/PPCISelLowering.h | 4 ++-- lib/Target/X86/X86ISelLowering.cpp | 10 +++++----- lib/Target/X86/X86ISelLowering.h | 13 +++++++------ 10 files changed, 34 insertions(+), 43 deletions(-) (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp') diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index f301043851..3adf517e4a 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -702,7 +702,7 @@ public: /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a + /// 'ZeroOrLdSrc' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -710,19 +710,20 @@ public: /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/, unsigned /*SrcAlign*/, - bool /*IsZeroVal*/, + bool /*ZeroOrLdSrc*/, bool /*MemcpyStrSrc*/, MachineFunction &/*MF*/) const { return MVT::Other; } - /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// isSafeMemOpType - Returns true if it's safe to use load / store of the /// specified type to expand memcpy / memset inline. This is mostly true - /// for legal types except for some special cases. For example, on X86 + /// for all types except for some special cases. For example, on X86 /// targets without SSE2 f64 load / store are done with fldl / fstpl which - /// also does type conversion. - virtual bool isLegalMemOpType(MVT VT) const { - return VT.isInteger(); + /// also does type conversion. Note the specified type doesn't have to be + /// legal as the hook is used before type legalization. + virtual bool isSafeMemOpType(MVT VT) const { + return true; } /// usesUnderscoreSetJmp - Determine if we should use _setjmp or setjmp diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 36592e54f8..754e6f352d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3426,7 +3426,7 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool ZeroOrLdSrc, bool MemcpyStrSrc, bool AllowOverlap, SelectionDAG &DAG, @@ -3441,7 +3441,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsZeroVal, MemcpyStrSrc, + ZeroOrLdSrc, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { @@ -3481,11 +3481,11 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, if (VT.isVector() || VT.isFloatingPoint()) { NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && - TLI.isLegalMemOpType(NewVT.getSimpleVT())) + TLI.isSafeMemOpType(NewVT.getSimpleVT())) Found = true; else if (NewVT == MVT::i64 && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && - TLI.isLegalMemOpType(MVT::f64)) { + TLI.isSafeMemOpType(MVT::f64)) { // i64 is usually not legal on 32-bit targets, but f64 may be. NewVT = MVT::f64; Found = true; @@ -3497,7 +3497,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); if (NewVT == MVT::i8) break; - } while (!TLI.isLegalMemOpType(NewVT.getSimpleVT())); + } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); } NewVTSize = NewVT.getSizeInBits() / 8; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index de7159e474..613df7a9b3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9450,13 +9450,13 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... - if (IsZeroVal && + if (ZeroOrLdSrc && Subtarget->hasNEON() && !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { bool Fast; @@ -9481,10 +9481,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::Other; } -bool ARMTargetLowering::isLegalMemOpType(MVT VT) const { - return VT.isInteger() || VT == MVT::f64 || VT == MVT::v2f64; -} - bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { if (Val.getOpcode() != ISD::LOAD) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 3e78ae3b2d..59e2fd333f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -292,17 +292,10 @@ namespace llvm { virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const; - /// isLegalMemOpType - Returns true if it's legal to use load / store of the - /// specified type to expand memcpy / memset inline. This is mostly true - /// for legal types except for some special cases. For example, on X86 - /// targets without SSE2 f64 load / store are done with fldl / fstpl which - /// also does type conversion. - virtual bool isLegalMemOpType(MVT VT) const; - using TargetLowering::isZExtFree; virtual bool isZExtFree(SDValue Val, EVT VT2) const; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 619ae077b3..5edf82bc7f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3476,7 +3476,7 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsZeroVal, + unsigned SrcAlign, bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const { if (Subtarget->hasMips64()) diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 4b318dc16f..46986613cb 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -362,7 +362,7 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsZeroVal, + unsigned SrcAlign, bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 51f2192371..59aaf6db04 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6815,7 +6815,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If -/// 'IsZeroVal' is true, that means it's safe to return a +/// 'ZeroOrLdSrc' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -6823,7 +6823,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 9f15eb89d5..a90118d3d0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -401,7 +401,7 @@ namespace llvm { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a + /// 'ZeroOrLdSrc' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -409,7 +409,7 @@ namespace llvm { /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, bool MemcpyStrSrc, + bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const; /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 800c2012df..f87d1fcb88 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1370,7 +1370,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If -/// 'IsZeroVal' is true, that means it's safe to return a +/// 'ZeroOrLdSrc' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -1379,11 +1379,11 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); - if (IsZeroVal && + if (ZeroOrLdSrc && !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || @@ -1412,12 +1412,12 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } -bool X86TargetLowering::isLegalMemOpType(MVT VT) const { +bool X86TargetLowering::isSafeMemOpType(MVT VT) const { if (VT == MVT::f32) return X86ScalarSSEf32; else if (VT == MVT::f64) return X86ScalarSSEf64; - return VT.isInteger(); + return true; } bool diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9d22da1dd9..601ed2b120 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -495,7 +495,7 @@ namespace llvm { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a + /// 'ZeroOrLdSrc' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -503,15 +503,16 @@ namespace llvm { /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, bool MemcpyStrSrc, + bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const; - /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// isSafeMemOpType - Returns true if it's safe to use load / store of the /// specified type to expand memcpy / memset inline. This is mostly true - /// for legal types except for some special cases. For example, on X86 + /// for all types except for some special cases. For example, on X86 /// targets without SSE2 f64 load / store are done with fldl / fstpl which - /// also does type conversion. - virtual bool isLegalMemOpType(MVT VT) const; + /// also does type conversion. Note the specified type doesn't have to be + /// legal as the hook is used before type legalization. + virtual bool isSafeMemOpType(MVT VT) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. Returns whether it -- cgit v1.2.3-70-g09d2 From 946a3a9f22c967d5432eaab5fa464b91343477cd Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 12 Dec 2012 02:34:41 +0000 Subject: Sorry about the churn. One more change to getOptimalMemOpType() hook. Did I mention the inline memcpy / memset expansion code is a mess? This patch split the ZeroOrLdSrc argument into two: IsMemset and ZeroMemset. The first indicates whether it is expanding a memset or a memcpy / memmove. The later is whether the memset is a memset of zero. It's totally possible (likely even) that targets may want to do different things for memcpy and memset of zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169959 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 12 ++++++------ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 13 +++++++------ lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- lib/Target/ARM/ARMISelLowering.h | 2 +- lib/Target/Mips/MipsISelLowering.cpp | 3 ++- lib/Target/Mips/MipsISelLowering.h | 3 ++- lib/Target/PowerPC/PPCISelLowering.cpp | 11 +++++------ lib/Target/PowerPC/PPCISelLowering.h | 13 ++++++------- lib/Target/X86/X86ISelLowering.cpp | 13 ++++++------- lib/Target/X86/X86ISelLowering.h | 13 ++++++------- 10 files changed, 43 insertions(+), 44 deletions(-) (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp') diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 3adf517e4a..6c88261cf7 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -701,16 +701,16 @@ public: /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'ZeroOrLdSrc' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/, unsigned /*SrcAlign*/, - bool /*ZeroOrLdSrc*/, + bool /*IsMemset*/, + bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, MachineFunction &/*MF*/) const { return MVT::Other; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 754e6f352d..269f22150a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3426,7 +3426,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, + bool IsMemset, + bool ZeroMemset, bool MemcpyStrSrc, bool AllowOverlap, SelectionDAG &DAG, @@ -3441,7 +3442,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - ZeroOrLdSrc, MemcpyStrSrc, + IsMemset, ZeroMemset, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { @@ -3559,7 +3560,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), - true, CopyFromStr, true, DAG, TLI)) + false, false, CopyFromStr, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3660,8 +3661,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), - SrcAlign, true, false, false, DAG, TLI)) + (DstAlignCanChange ? 0 : Align), SrcAlign, + false, false, false, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3737,7 +3738,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isa(Src) && cast(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - IsZeroVal, false, true, DAG, TLI)) + true, IsZeroVal, false, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 613df7a9b3..b0c81caa78 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9450,13 +9450,13 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... - if (ZeroOrLdSrc && + if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { bool Fast; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 59e2fd333f..4c8bd0ece3 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -292,7 +292,7 @@ namespace llvm { virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 5edf82bc7f..65b33c12b8 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3476,7 +3476,8 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool ZeroOrLdSrc, + unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (Subtarget->hasMips64()) diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 46986613cb..ff25df94e3 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -362,7 +362,8 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool ZeroOrLdSrc, + unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 59aaf6db04..1402718f71 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6814,16 +6814,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If -/// 'ZeroOrLdSrc' is true, that means it's safe to return a -/// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is -/// constant so it does not need to be loaded. +/// probably because the source does not need to be loaded. If 'IsMemset' is +/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that +/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy +/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index a90118d3d0..4b44dbcef8 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -400,16 +400,15 @@ namespace llvm { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'ZeroOrLdSrc' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, bool MemcpyStrSrc, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f87d1fcb88..23301b60c2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1369,21 +1369,20 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If -/// 'ZeroOrLdSrc' is true, that means it's safe to return a -/// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is -/// constant so it does not need to be loaded. +/// probably because the source does not need to be loaded. If 'IsMemset' is +/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that +/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy +/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); - if (ZeroOrLdSrc && + if ((!IsMemset || ZeroMemset) && !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 601ed2b120..cbe83a7c24 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -494,16 +494,15 @@ namespace llvm { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'ZeroOrLdSrc' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, bool MemcpyStrSrc, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; /// isSafeMemOpType - Returns true if it's safe to use load / store of the -- cgit v1.2.3-70-g09d2 From a16e49d56f6349c12da2b561da00c22e13eda09b Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 12 Dec 2012 20:43:23 +0000 Subject: Fix a logic bug in inline expansion of memcpy / memset with an overlapping load / store pair. It's not legal to use a wider load than the size of the remaining bytes if it's the first pair of load / store. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170018 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 +++++---- test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll | 11 +++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 269f22150a..2375182167 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3469,9 +3469,6 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned NumMemOps = 0; while (Size != 0) { - if (++NumMemOps > Limit) - return false; - unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. @@ -3507,7 +3504,8 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // FIXME: Only does this for 64-bit or more since we don't have proper // cost model for unaligned load / store. bool Fast; - if (AllowOverlap && VTSize >= 8 && NewVTSize < Size && + if (NumMemOps && AllowOverlap && + VTSize >= 8 && NewVTSize < Size && TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) VTSize = Size; else { @@ -3516,6 +3514,9 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, } } + if (++NumMemOps > Limit) + return false; + MemOps.push_back(VT); Size -= VTSize; } diff --git a/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll new file mode 100644 index 0000000000..9d4daee696 --- /dev/null +++ b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=mips64el -mcpu=mips64r2 < %s + +@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1 + +define void @t(i8* %ptr) { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %ptr, i8* getelementptr inbounds ([7 x i8]* @.str, i64 0, i64 0), i64 7, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind -- cgit v1.2.3-70-g09d2