From bade0345d190427a08b2b947bc94f4d8ca5d7717 Mon Sep 17 00:00:00 2001 From: Patrik Hagglund Date: Tue, 11 Dec 2012 09:57:18 +0000 Subject: Change TargetLowering::findRepresentativeClass to take an MVT, instead of EVT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169845 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 90bee41e35..8b0bf34c1a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1472,10 +1472,10 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, // FIXME: Why this routine is here? Move to RegInfo! std::pair -X86TargetLowering::findRepresentativeClass(EVT VT) const{ +X86TargetLowering::findRepresentativeClass(MVT VT) const{ const TargetRegisterClass *RRC = 0; uint8_t Cost = 1; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(VT); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a515be23ef..15a43db510 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -708,7 +708,7 @@ namespace llvm { protected: std::pair - findRepresentativeClass(EVT VT) const; + findRepresentativeClass(MVT VT) const; private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can -- cgit v1.2.3-18-g5258 From 2d916231ff503b995bf3b65a338c9bf0d84ee7c7 Mon Sep 17 00:00:00 2001 From: Patrik Hagglund Date: Tue, 11 Dec 2012 10:09:23 +0000 Subject: Change TargetLowering::RegisterTypeForVT to contain MVTs, instead of EVTs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169848 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3e78b4c0a1..0dcdb9c761 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1909,11 +1909,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { ComputeValueVTs(TLI, I->getType(), RetTys); for (unsigned i = 0, e = RetTys.size(); i != e; ++i) { EVT VT = RetTys[i]; - EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); + MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT); for (unsigned j = 0; j != NumRegs; ++j) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT.getSimpleVT(); + MyFlags.VT = RegisterVT; MyFlags.Used = !CS.getInstruction()->use_empty(); if (CS.paramHasAttr(0, Attributes::SExt)) MyFlags.Flags.setSExt(); -- cgit v1.2.3-18-g5258 From 47fd10f2fc45d280308b77ed4eda16f3c9c88248 Mon Sep 17 00:00:00 2001 From: Patrik Hagglund Date: Tue, 11 Dec 2012 10:20:51 +0000 Subject: Change TargetLowering::getTypeForExtArgOrReturn to take and return MVTs, instead of EVTs. Accordingly, add bitsLT (and similar) to MVT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169850 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 +++--- lib/Target/X86/X86ISelLowering.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8b0bf34c1a..a44879e371 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1689,8 +1689,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return true; } -EVT -X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, +MVT +X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, MVT VT, ISD::NodeType ExtendKind) const { MVT ReturnMVT; // TODO: Is this also valid on 32-bit? @@ -1699,7 +1699,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, else ReturnMVT = MVT::i32; - EVT MinVT = getRegisterType(Context, ReturnMVT); + MVT MinVT = getRegisterType(Context, ReturnMVT); return VT.bitsLT(MinVT) ? MinVT : VT; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 15a43db510..e3826be690 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -859,8 +859,8 @@ namespace llvm { virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; - virtual EVT - getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, + virtual MVT + getTypeForExtArgOrReturn(LLVMContext &Context, MVT VT, ISD::NodeType ExtendKind) const; virtual bool -- cgit v1.2.3-18-g5258 From 34525f9ac098c1c6bc9002886d6da3039a284fd2 Mon Sep 17 00:00:00 2001 From: Patrik Hagglund Date: Tue, 11 Dec 2012 11:14:33 +0000 Subject: Revert EVT->MVT changes, r169836-169851, due to buildbot failures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169854 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.cpp | 10 +++++----- lib/Target/X86/X86ISelLowering.h | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 0dcdb9c761..3e78b4c0a1 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1909,11 +1909,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { ComputeValueVTs(TLI, I->getType(), RetTys); for (unsigned i = 0, e = RetTys.size(); i != e; ++i) { EVT VT = RetTys[i]; - MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); + EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT); for (unsigned j = 0; j != NumRegs; ++j) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; + MyFlags.VT = RegisterVT.getSimpleVT(); MyFlags.Used = !CS.getInstruction()->use_empty(); if (CS.paramHasAttr(0, Attributes::SExt)) MyFlags.Flags.setSExt(); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a44879e371..90bee41e35 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1472,10 +1472,10 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, // FIXME: Why this routine is here? Move to RegInfo! std::pair -X86TargetLowering::findRepresentativeClass(MVT VT) const{ +X86TargetLowering::findRepresentativeClass(EVT VT) const{ const TargetRegisterClass *RRC = 0; uint8_t Cost = 1; - switch (VT.SimpleTy) { + switch (VT.getSimpleVT().SimpleTy) { default: return TargetLowering::findRepresentativeClass(VT); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: @@ -1689,8 +1689,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return true; } -MVT -X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, MVT VT, +EVT +X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const { MVT ReturnMVT; // TODO: Is this also valid on 32-bit? @@ -1699,7 +1699,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, MVT VT, else ReturnMVT = MVT::i32; - MVT MinVT = getRegisterType(Context, ReturnMVT); + EVT MinVT = getRegisterType(Context, ReturnMVT); return VT.bitsLT(MinVT) ? MinVT : VT; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e3826be690..a515be23ef 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -708,7 +708,7 @@ namespace llvm { protected: std::pair - findRepresentativeClass(MVT VT) const; + findRepresentativeClass(EVT VT) const; private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can @@ -859,8 +859,8 @@ namespace llvm { virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; - virtual MVT - getTypeForExtArgOrReturn(LLVMContext &Context, MVT VT, + virtual EVT + getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const; virtual bool -- cgit v1.2.3-18-g5258 From 61f4dfe3693bf68b20748d82ac4dd9bf2f356699 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 12 Dec 2012 00:42:09 +0000 Subject: Avoid using lossy load / stores for memcpy / memset expansion. e.g. f64 load / store on non-SSE2 x86 targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169944 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 8 ++++++++ lib/Target/X86/X86ISelLowering.h | 7 +++++++ 2 files changed, 15 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 90bee41e35..800c2012df 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1412,6 +1412,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } +bool X86TargetLowering::isLegalMemOpType(MVT VT) const { + if (VT == MVT::f32) + return X86ScalarSSEf32; + else if (VT == MVT::f64) + return X86ScalarSSEf64; + return VT.isInteger(); +} + bool X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { if (Fast) diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a515be23ef..9d22da1dd9 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -506,6 +506,13 @@ namespace llvm { bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; + /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// specified type to expand memcpy / memset inline. This is mostly true + /// for legal types except for some special cases. For example, on X86 + /// targets without SSE2 f64 load / store are done with fldl / fstpl which + /// also does type conversion. + virtual bool isLegalMemOpType(MVT VT) const; + /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. Returns whether it /// is "fast" by reference in the second argument. -- cgit v1.2.3-18-g5258 From 7d34267df63e23be1957f738de783c145febb7af Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 12 Dec 2012 01:32:07 +0000 Subject: - Rename isLegalMemOpType to isSafeMemOpType. "Legal" is a very overloade term. Also added more comments to explain why it is generally ok to return true. - Rename getOptimalMemOpType argument IsZeroVal to ZeroOrLdSrc. It's meant to be true for loaded source (memcpy) or zero constants (memset). The poor name choice is probably some kind of legacy issue. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169954 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 +++++----- lib/Target/X86/X86ISelLowering.h | 13 +++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 800c2012df..f87d1fcb88 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1370,7 +1370,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If -/// 'IsZeroVal' is true, that means it's safe to return a +/// 'ZeroOrLdSrc' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -1379,11 +1379,11 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); - if (IsZeroVal && + if (ZeroOrLdSrc && !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || @@ -1412,12 +1412,12 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } -bool X86TargetLowering::isLegalMemOpType(MVT VT) const { +bool X86TargetLowering::isSafeMemOpType(MVT VT) const { if (VT == MVT::f32) return X86ScalarSSEf32; else if (VT == MVT::f64) return X86ScalarSSEf64; - return VT.isInteger(); + return true; } bool diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9d22da1dd9..601ed2b120 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -495,7 +495,7 @@ namespace llvm { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a + /// 'ZeroOrLdSrc' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -503,15 +503,16 @@ namespace llvm { /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, bool MemcpyStrSrc, + bool ZeroOrLdSrc, bool MemcpyStrSrc, MachineFunction &MF) const; - /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// isSafeMemOpType - Returns true if it's safe to use load / store of the /// specified type to expand memcpy / memset inline. This is mostly true - /// for legal types except for some special cases. For example, on X86 + /// for all types except for some special cases. For example, on X86 /// targets without SSE2 f64 load / store are done with fldl / fstpl which - /// also does type conversion. - virtual bool isLegalMemOpType(MVT VT) const; + /// also does type conversion. Note the specified type doesn't have to be + /// legal as the hook is used before type legalization. + virtual bool isSafeMemOpType(MVT VT) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. Returns whether it -- cgit v1.2.3-18-g5258 From 946a3a9f22c967d5432eaab5fa464b91343477cd Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 12 Dec 2012 02:34:41 +0000 Subject: Sorry about the churn. One more change to getOptimalMemOpType() hook. Did I mention the inline memcpy / memset expansion code is a mess? This patch split the ZeroOrLdSrc argument into two: IsMemset and ZeroMemset. The first indicates whether it is expanding a memset or a memcpy / memmove. The later is whether the memset is a memset of zero. It's totally possible (likely even) that targets may want to do different things for memcpy and memset of zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169959 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 13 ++++++------- lib/Target/X86/X86ISelLowering.h | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f87d1fcb88..23301b60c2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1369,21 +1369,20 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If -/// 'ZeroOrLdSrc' is true, that means it's safe to return a -/// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is -/// constant so it does not need to be loaded. +/// probably because the source does not need to be loaded. If 'IsMemset' is +/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that +/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy +/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); - if (ZeroOrLdSrc && + if ((!IsMemset || ZeroMemset) && !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 601ed2b120..cbe83a7c24 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -494,16 +494,15 @@ namespace llvm { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'ZeroOrLdSrc' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool ZeroOrLdSrc, bool MemcpyStrSrc, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; /// isSafeMemOpType - Returns true if it's safe to use load / store of the -- cgit v1.2.3-18-g5258 From e1d31008c9950ada1a92d0499acb001a2dd76a84 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 13 Dec 2012 00:24:56 +0000 Subject: Fix a bogus comment git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170052 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 467edadc7e..5f2c75ed55 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -279,9 +279,9 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.setOpcode(RelaxedOp); } -/// writeNopData - Write optimal nops to the output file for the \p Count -/// bytes. This returns the number of bytes written. It may return 0 if -/// the \p Count is more than the maximum optimal nops. +/// \brief Write a sequence of optimal nops to the output, covering \p Count +/// bytes. +/// \return - true on success, false on failure bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { static const uint8_t Nops[10][10] = { // nop -- cgit v1.2.3-18-g5258 From 0a1e914f8f276cd854e23f7c4d21620f2dbe533c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 14 Dec 2012 21:20:37 +0000 Subject: TypeLegalizer: Do not generate target specific nodes with illegal types, because we cant type-legalize them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170245 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 23301b60c2..d4ee985ca6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11742,6 +11742,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); @@ -11791,6 +11792,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_ROUND: { + if (!TLI.isTypeLegal(N->getOperand(0).getValueType())) + return; SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); Results.push_back(V); return; -- cgit v1.2.3-18-g5258 From 5db4bceb47e8db48522ecd94fcfd7747451157bd Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sat, 15 Dec 2012 09:01:13 +0000 Subject: Make '-mtune=x86_64' assume fast unaligned memory accesses. Not all chips targeted by x86_64 have this feature, but a dramatically increasing number do. Specifying a chip-specific tuning parameter will continue to turn the feature on or off as appropriate for that particular chip, but the generic flag should try to achieve the best performance on the most widely available hardware. Today, the number of chips with fast UA access dwarfs those without in the x86-64 space. Note that this also brings LLVM's code generation for this '-march' flag more in line with that of modern GCCs. Reviewed by Dan Gohman. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170269 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index e3c22d9c3b..b9d8cf7645 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -155,7 +155,8 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>; def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; -def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem]>; def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, -- cgit v1.2.3-18-g5258 From 388fc6a988b56a50efff57893a4df14b4d04e1cd Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 15 Dec 2012 16:47:44 +0000 Subject: X86: Add a couple of target-specific dag combines that turn VSELECTS into psubus if possible. We match the pattern "x >= y ? x-y : 0" into "subus x, y" and two special cases if y is a constant. DAGCombiner canonicalizes those so we first have to undo the canonicalization for those cases. The pattern occurs in gzip when the loop vectorizer is enabled. Part of PR14613. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170273 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 68 +++++++++++++++++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 3 ++ lib/Target/X86/X86InstrFragmentsSIMD.td | 1 + lib/Target/X86/X86InstrSSE.td | 34 ++++++++--------- 4 files changed, 88 insertions(+), 18 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d4ee985ca6..70089fcc65 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10097,6 +10097,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + // SSE2/AVX2 sub with unsigned saturation intrinsics + case Intrinsic::x86_sse2_psubus_b: + case Intrinsic::x86_sse2_psubus_w: + case Intrinsic::x86_avx2_psubus_b: + case Intrinsic::x86_avx2_psubus_w: + return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + // SSE3/AVX horizontal add/sub intrinsics case Intrinsic::x86_sse3_hadd_ps: case Intrinsic::x86_sse3_hadd_pd: @@ -11961,6 +11969,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; + case X86ISD::SUBUS: return "X86ISD::SUBUS"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; @@ -14913,6 +14922,65 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } + // Match VSELECTs into subs with unsigned saturation. + if (!DCI.isBeforeLegalize() && + N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && + // psubus is available in SSE2 and AVX2 for i8 and i16 vectors. + ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) || + (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) { + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + + // Check if one of the arms of the VSELECT is a zero vector. If it's on the + // left side invert the predicate to simplify logic below. + SDValue Other; + if (ISD::isBuildVectorAllZeros(LHS.getNode())) { + Other = RHS; + CC = ISD::getSetCCInverse(CC, true); + } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) { + Other = LHS; + } + + if (Other.getNode() && Other->getNumOperands() == 2 && + DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) { + SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1); + SDValue CondRHS = Cond->getOperand(1); + + // Look for a general sub with unsigned saturation first. + // x >= y ? x-y : 0 --> subus x, y + // x > y ? x-y : 0 --> subus x, y + if ((CC == ISD::SETUGE || CC == ISD::SETUGT) && + Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS)) + return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); + + // If the RHS is a constant we have to reverse the const canonicalization. + // x > C-1 ? x+-C : 0 --> subus x, C + if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && + isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) { + APInt A = cast(OpRHS.getOperand(0))->getAPIntValue(); + if (CondRHS.getConstantOperandVal(0) == -A-1) { + SmallVector V(VT.getVectorNumElements(), + DAG.getConstant(-A, VT.getScalarType())); + return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, + V.data(), V.size())); + } + } + + // Another special case: If C was a sign bit, the sub has been + // canonicalized into a xor. + // FIXME: Would it be better to use ComputeMaskedBits to determine whether + // it's safe to decanonicalize the xor? + // x s< 0 ? x^C : 0 --> subus x, C + if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && + ISD::isBuildVectorAllZeros(CondRHS.getNode()) && + isSplatVector(OpRHS.getNode())) { + APInt A = cast(OpRHS.getOperand(0))->getAPIntValue(); + if (A.isSignBit()) + return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); + } + } + } + // If we know that this node is legal then we know that it is going to be // matched by one of the SSE/AVX BLEND instructions. These instructions only // depend on the highest bit in each word. Try to use SimplifyDemandedBits diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index cbe83a7c24..60f34099dd 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -182,6 +182,9 @@ namespace llvm { /// BLENDI - Blend where the selector is an immediate. BLENDI, + // SUBUS - Integer sub with unsigned saturation. + SUBUS, + /// HADD - Integer horizontal add. HADD, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 09ab995166..7d16d2741d 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -128,6 +128,7 @@ def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>; def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; +def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1912a936ce..54032fe97f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3724,6 +3724,12 @@ defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64, i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V; +defm VPSUBUSB : PDI_binop_rm<0xD8, "vpsubusb", X86subus, v16i8, VR128, + memopv2i64, i128mem, SSE_INTALU_ITINS_P, 0, 0>, + VEX_4V; +defm VPSUBUSW : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v8i16, VR128, + memopv2i64, i128mem, SSE_INTALU_ITINS_P, 0, 0>, + VEX_4V; defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; @@ -3735,12 +3741,6 @@ defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; @@ -3804,6 +3804,12 @@ defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64, i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L; +defm VPSUBUSBY : PDI_binop_rm<0xD8, "vpsubusb", X86subus, v32i8, VR256, + memopv4i64, i256mem, SSE_INTALU_ITINS_P, 0, 0>, + VEX_4V, VEX_L; +defm VPSUBUSWY : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v16i16, VR256, + memopv4i64, i256mem, SSE_INTALU_ITINS_P, 0, 0>, + VEX_4V, VEX_L; defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, VR256, memopv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; @@ -3815,12 +3821,6 @@ defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w, VR256, memopv4i64, i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b, VR256, memopv4i64, i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; @@ -3884,6 +3884,10 @@ defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P>; defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64, i128mem, SSE_INTALUQ_ITINS_P>; +defm PSUBUSB : PDI_binop_rm<0xD8, "psubusb", X86subus, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P>; +defm PSUBUSW : PDI_binop_rm<0xD9, "psubusw", X86subus, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P>; defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; @@ -3894,12 +3898,6 @@ defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P>; -defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>; -- cgit v1.2.3-18-g5258 From 16a1acc3b96bb85e53c184f4fd4fd614543cec6b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 17 Dec 2012 04:55:07 +0000 Subject: Remove store forms of DEC/INC from isDefConvertible. Since they are stores they don't have a register def. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170303 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 4c61b32cac..b71eeca36d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3152,19 +3152,15 @@ inline static bool isDefConvertible(MachineInstr *MI) { case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: - case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: - case X86::DEC64m: case X86::DEC32m: case X86::DEC16m: case X86::DEC8m: + case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: case X86::DEC64_32r: case X86::DEC64_16r: - case X86::DEC64_32m: case X86::DEC64_16m: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: - case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: - case X86::INC64m: case X86::INC32m: case X86::INC16m: case X86::INC8m: + case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: case X86::INC64_32r: case X86::INC64_16r: - case X86::INC64_32m: case X86::INC64_16m: case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: -- cgit v1.2.3-18-g5258 From b72ae7003629771bdb892d9a03cb761b4dbac5be Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 17 Dec 2012 05:02:29 +0000 Subject: Add rest of BMI/BMI2 instructions to the folding tables as well as popcnt and lzcnt. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170304 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index b71eeca36d..962977f652 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -562,7 +562,23 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, - // BMI/BMI2 foldable instructions + // BMI/BMI2/LZCNT/POPCNT foldable instructions + { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, + { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, + { X86::BLSI32rr, X86::BLSI32rm, 0 }, + { X86::BLSI64rr, X86::BLSI64rm, 0 }, + { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 }, + { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 }, + { X86::BLSR32rr, X86::BLSR32rm, 0 }, + { X86::BLSR64rr, X86::BLSR64rm, 0 }, + { X86::BZHI32rr, X86::BZHI32rm, 0 }, + { X86::BZHI64rr, X86::BZHI64rm, 0 }, + { X86::LZCNT16rr, X86::LZCNT16rm, 0 }, + { X86::LZCNT32rr, X86::LZCNT32rm, 0 }, + { X86::LZCNT64rr, X86::LZCNT64rm, 0 }, + { X86::POPCNT16rr, X86::POPCNT16rm, 0 }, + { X86::POPCNT32rr, X86::POPCNT32rm, 0 }, + { X86::POPCNT64rr, X86::POPCNT64rm, 0 }, { X86::RORX32ri, X86::RORX32mi, 0 }, { X86::RORX64ri, X86::RORX64mi, 0 }, { X86::SARX32rr, X86::SARX32rm, 0 }, @@ -571,6 +587,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::SHRX64rr, X86::SHRX64rm, 0 }, { X86::SHLX32rr, X86::SHLX32rm, 0 }, { X86::SHLX64rr, X86::SHLX64rm, 0 }, + { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, + { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, + { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1156,8 +1175,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, // BMI/BMI2 foldable instructions + { X86::ANDN32rr, X86::ANDN32rm, 0 }, + { X86::ANDN64rr, X86::ANDN64rm, 0 }, { X86::MULX32rr, X86::MULX32rm, 0 }, { X86::MULX64rr, X86::MULX64rm, 0 }, + { X86::PDEP32rr, X86::PDEP32rm, 0 }, + { X86::PDEP64rr, X86::PDEP64rm, 0 }, + { X86::PEXT32rr, X86::PEXT32rm, 0 }, + { X86::PEXT64rr, X86::PEXT64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { -- cgit v1.2.3-18-g5258 From b926afcc5b99030fecf496d15cffdd1315fd0ead Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 17 Dec 2012 05:12:30 +0000 Subject: Simplify BMI ANDN matching to use patterns instead of a DAG combine. Also add ANDN to isDefConvertible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170305 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 +--------- lib/Target/X86/X86ISelLowering.h | 2 -- lib/Target/X86/X86InstrArithmetic.td | 15 +++++++++++++-- lib/Target/X86/X86InstrInfo.cpp | 2 ++ 4 files changed, 16 insertions(+), 13 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 70089fcc65..34ca24f6d1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12026,7 +12026,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; - case X86ISD::ANDN: return "X86ISD::ANDN"; case X86ISD::BLSI: return "X86ISD::BLSI"; case X86ISD::BLSMSK: return "X86ISD::BLSMSK"; case X86ISD::BLSR: return "X86ISD::BLSR"; @@ -15632,7 +15631,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); - // Create ANDN, BLSI, and BLSR instructions + // Create BLSI, and BLSR instructions // BLSI is X & (-X) // BLSR is X & (X-1) if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) { @@ -15640,13 +15639,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); - // Check LHS for not - if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1))) - return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1); - // Check RHS for not - if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1))) - return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0); - // Check LHS for neg if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 && isZero(N0.getOperand(0))) diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 60f34099dd..c51460bc42 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -273,8 +273,6 @@ namespace llvm { ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - ANDN, // ANDN - Bitwise AND NOT with FLAGS results. - BLSI, // BLSI - Extract lowest set isolated bit BLSMSK, // BLSMSK - Get mask up to lowest set bit BLSR, // BLSR - Reset lowest set bit diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index f790611b8f..478c42d657 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1204,12 +1204,12 @@ multiclass bmi_andn { def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))], + [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))], IIC_BIN_NONMEM>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>; + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -1217,6 +1217,17 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W; } +let Predicates = [HasBMI] in { + def : Pat<(and (not GR32:$src1), GR32:$src2), + (ANDN32rr GR32:$src1, GR32:$src2)>; + def : Pat<(and (not GR64:$src1), GR64:$src2), + (ANDN64rr GR64:$src1, GR64:$src2)>; + def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)), + (ANDN32rm GR32:$src1, addr:$src2)>; + def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)), + (ANDN64rm GR64:$src1, addr:$src2)>; +} + //===----------------------------------------------------------------------===// // MULX Instruction // diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 962977f652..d02e12fc4f 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3201,6 +3201,8 @@ inline static bool isDefConvertible(MachineInstr *MI) { case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: + case X86::ANDN32rr: case X86::ANDN32rm: + case X86::ANDN64rr: case X86::ANDN64rm: return true; } } -- cgit v1.2.3-18-g5258 From a521e68210af701d336281c14ce8031e5fefc516 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 17 Dec 2012 06:13:48 +0000 Subject: Remove EFLAGS from the BLSI/BLSMSK/BLSR patterns. The nodes created by DAG combine don't contain an EFLAGS def. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170308 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index cdf1c8935f..cf7acfb81d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -247,9 +247,9 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; -def X86blsi_flag : SDNode<"X86ISD::BLSI", SDTUnaryArithWithFlags>; -def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK", SDTUnaryArithWithFlags>; -def X86blsr_flag : SDNode<"X86ISD::BLSR", SDTUnaryArithWithFlags>; +def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>; +def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>; +def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -1605,26 +1605,26 @@ multiclass bmi_bls { def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V; + [(set RC:$dst, (OpNode RC:$src)), (implicit EFLAGS)]>, T8, VEX_4V; def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>, + [(set RC:$dst, (OpNode (ld_frag addr:$src))), (implicit EFLAGS)]>, T8, VEX_4V; } let Predicates = [HasBMI], Defs = [EFLAGS] in { defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, - X86blsr_flag, loadi32>; + X86blsr, loadi32>; defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, - X86blsr_flag, loadi64>, VEX_W; + X86blsr, loadi64>, VEX_W; defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, - X86blsmsk_flag, loadi32>; + X86blsmsk, loadi32>; defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, - X86blsmsk_flag, loadi64>, VEX_W; + X86blsmsk, loadi64>, VEX_W; defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, - X86blsi_flag, loadi32>; + X86blsi, loadi32>; defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, - X86blsi_flag, loadi64>, VEX_W; + X86blsi, loadi64>, VEX_W; } multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, -- cgit v1.2.3-18-g5258 From 270bfbd3d1fb42000b23e5747ac7957b0e9fcab8 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 18 Dec 2012 22:57:56 +0000 Subject: Reverse order of checking SSE level when calculating compare cost, so we check AVX2 before AVX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170464 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 34ca24f6d1..42d62b2a72 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17866,10 +17866,10 @@ unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, { ISD::SETCC, MVT::v32i8, 1 }, }; - if (ST.hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (ST.hasAVX2()) { + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; + return LT.first * AVX2CostTbl[Idx].Cost; } if (ST.hasAVX()) { @@ -17878,10 +17878,10 @@ unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, return LT.first * AVX1CostTbl[Idx].Cost; } - if (ST.hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (ST.hasSSE42()) { + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; + return LT.first * SSE42CostTbl[Idx].Cost; } return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); -- cgit v1.2.3-18-g5258 From 034b94b17006f51722886b0f2283fb6fb19aca1f Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 19 Dec 2012 07:18:57 +0000 Subject: Rename the 'Attributes' class to 'Attribute'. It's going to represent a single attribute in the future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170502 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 20 ++++++++++---------- lib/Target/X86/X86FrameLowering.cpp | 2 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 2 +- lib/Target/X86/X86ISelLowering.cpp | 14 +++++++------- lib/Target/X86/X86InstrInfo.cpp | 4 ++-- lib/Target/X86/X86RegisterInfo.cpp | 2 +- 6 files changed, 22 insertions(+), 22 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3e78b4c0a1..32baf4bb76 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1545,9 +1545,9 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, CallingConv::ID CC = CS.getCallingConv(); if (CC == CallingConv::Fast || CC == CallingConv::GHC) return 0; - if (!CS.paramHasAttr(1, Attributes::StructRet)) + if (!CS.paramHasAttr(1, Attribute::StructRet)) return 0; - if (CS.paramHasAttr(1, Attributes::InReg)) + if (CS.paramHasAttr(1, Attribute::InReg)) return 0; return 4; } @@ -1626,12 +1626,12 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { Value *ArgVal = *i; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attributes::SExt)) + if (CS.paramHasAttr(AttrInd, Attribute::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attributes::ZExt)) + if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) Flags.setZExt(); - if (CS.paramHasAttr(AttrInd, Attributes::ByVal)) { + if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) { PointerType *Ty = cast(ArgVal->getType()); Type *ElementTy = Ty->getElementType(); unsigned FrameSize = TD.getTypeAllocSize(ElementTy); @@ -1645,9 +1645,9 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { return false; } - if (CS.paramHasAttr(AttrInd, Attributes::InReg)) + if (CS.paramHasAttr(AttrInd, Attribute::InReg)) Flags.setInReg(); - if (CS.paramHasAttr(AttrInd, Attributes::Nest)) + if (CS.paramHasAttr(AttrInd, Attribute::Nest)) Flags.setNest(); // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra @@ -1915,11 +1915,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT.getSimpleVT(); MyFlags.Used = !CS.getInstruction()->use_empty(); - if (CS.paramHasAttr(0, Attributes::SExt)) + if (CS.paramHasAttr(0, Attribute::SExt)) MyFlags.Flags.setSExt(); - if (CS.paramHasAttr(0, Attributes::ZExt)) + if (CS.paramHasAttr(0, Attribute::ZExt)) MyFlags.Flags.setZExt(); - if (CS.paramHasAttr(0, Attributes::InReg)) + if (CS.paramHasAttr(0, Attribute::InReg)) MyFlags.Flags.setInReg(); Ins.push_back(MyFlags); } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 18595a86ea..3deec5cd7f 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -674,7 +674,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). - if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attributes::NoRedZone) && + if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 88e10d3af9..acb89c11fc 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -432,7 +432,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. OptForSize = MF->getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize); + hasAttribute(Attribute::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 42d62b2a72..97f2a355db 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1383,7 +1383,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, MachineFunction &MF) const { const Function *F = MF.getFunction(); if ((!IsMemset || ZeroMemset) && - !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { + !F->getFnAttributes().hasAttribute(Attribute::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && @@ -2066,7 +2066,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, TotalNumIntRegs); bool NoImplicitFloatOps = Fn->getFnAttributes(). - hasAttribute(Attributes::NoImplicitFloat); + hasAttribute(Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && @@ -2545,7 +2545,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else if (Subtarget->isPICStyleRIPRel() && isa(GV) && cast(GV)->getFnAttributes(). - hasAttribute(Attributes::NonLazyBind)) { + hasAttribute(Attribute::NonLazyBind)) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). @@ -6735,7 +6735,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool HasInt256 = Subtarget->hasInt256(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize); + hasAttribute(Attribute::OptimizeForSize); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); @@ -9892,7 +9892,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() .getFunction()->getFnAttributes() - .hasAttribute(Attributes::NoImplicitFloat)) && + .hasAttribute(Attribute::NoImplicitFloat)) && Subtarget->hasSSE1()); } @@ -10741,7 +10741,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) - if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg)) + if (Attrs.getParamAttributes(Idx).hasAttribute(Attribute::InReg)) // FIXME: should only count parameters that are lowered to integers. InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; @@ -16159,7 +16159,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->getFnAttributes(). - hasAttribute(Attributes::NoImplicitFloat); + hasAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index d02e12fc4f..0363276bc1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3863,7 +3863,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls if (!MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize) && + hasAttribute(Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) return 0; @@ -3905,7 +3905,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls if (!MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize) && + hasAttribute(Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) return 0; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index b396a5ca81..1934b28fed 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -417,7 +417,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->getFnAttributes().hasAttribute(Attributes::StackAlignment)); + F->getFnAttributes().hasAttribute(Attribute::StackAlignment)); // If we've requested that we force align the stack do so now. if (ForceStackAlign) -- cgit v1.2.3-18-g5258 From 4b977312c7a9c078211de83771b40a1561deb047 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 19 Dec 2012 07:50:20 +0000 Subject: Optimized load + SIGN_EXTEND patterns in the X86 backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170506 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 25 ++++++++++-- lib/Target/X86/X86InstrSSE.td | 82 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 97f2a355db..fee9d932e5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15929,10 +15929,13 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // If this is a vector EXT Load then attempt to optimize it using a // shuffle. We need SSSE3 shuffles. + // SEXT loads are suppoted starting SSE41. + // We generate X86ISD::VSEXT for them. // TODO: It is possible to support ZExt by zeroing the undef values // during the shuffle phase or after the shuffle. if (RegVT.isVector() && RegVT.isInteger() && - Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) { + (Ext == ISD::EXTLOAD && Subtarget->hasSSSE3() || + Ext == ISD::SEXTLOAD && Subtarget->hasSSE41())){ assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); @@ -15941,6 +15944,9 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); + if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) + return SDValue(); + // All sizes must be a power of two. if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue(); @@ -15964,16 +15970,23 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Calculate the number of scalar loads that we need to perform // in order to load our vector from memory. unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits(); + if (Ext == ISD::SEXTLOAD && NumLoads > 1) + return SDValue(); + + unsigned loadRegZize = RegSz; + if (Ext == ISD::SEXTLOAD && RegSz == 256) + loadRegZize /= 2; // Represent our vector as a sequence of elements which are the // largest scalar that we can load. EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy, - RegSz/SclrLoadTy.getSizeInBits()); + loadRegZize/SclrLoadTy.getSizeInBits()); // Represent the data using the same element type that is stored in // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - RegSz/MemVT.getScalarType().getSizeInBits()); + EVT WideVecVT = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + loadRegZize/MemVT.getScalarType().getSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && "Invalid vector type"); @@ -16014,6 +16027,10 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res); unsigned SizeRatio = RegSz/MemSz; + if (Ext == ISD::SEXTLOAD) { + SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); + return DCI.CombineTo(N, Sext, TF, true); + } // Redistribute the loaded elements into the different locations. SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 54032fe97f..521073d0a8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5842,6 +5842,31 @@ defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; +let Predicates = [HasAVX2] in { + def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))), + (VPMOVSXWDYrm addr:$src)>; + def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))), + (VPMOVSXDQYrm addr:$src)>; + + def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXBDYrm addr:$src)>; + def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXBDYrm addr:$src)>; + + def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXWQYrm addr:$src)>; + def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXWQYrm addr:$src)>; + + def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVSXBQYrm addr:$src)>; +} + let Predicates = [HasAVX] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq @@ -5866,6 +5891,34 @@ let Predicates = [UseSSE41] in { (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), (PMOVZXBQrm addr:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVSXWDrm addr:$src)>; + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVSXWDrm addr:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVSXBDrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVSXWQrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (extloadi32i16 addr:$src))))))), + (PMOVSXBQrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVSXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVSXDQrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVSXBWrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVSXBWrm addr:$src)>; } let Predicates = [HasAVX2] in { @@ -5926,6 +5979,35 @@ let Predicates = [HasAVX] in { (VPMOVZXDQrm addr:$src)>; def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), (VPMOVZXDQrm addr:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXWDrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXDQrm addr:$src)>; + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXWDrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXDQrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXBWrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXBWrm addr:$src)>; + + def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVSXBDrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVSXWQrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (extloadi32i16 addr:$src))))))), + (VPMOVSXBQrm addr:$src)>; } let Predicates = [UseSSE41] in { -- cgit v1.2.3-18-g5258 From 16537418f4309ba0ab89f6b0dbadc77014ca671f Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 19 Dec 2012 10:12:48 +0000 Subject: X86ISelLowering.cpp: Fix warnings. [-Wlogical-op-parentheses] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170523 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fee9d932e5..93ea3721fe 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15934,8 +15934,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // TODO: It is possible to support ZExt by zeroing the undef values // during the shuffle phase or after the shuffle. if (RegVT.isVector() && RegVT.isInteger() && - (Ext == ISD::EXTLOAD && Subtarget->hasSSSE3() || - Ext == ISD::SEXTLOAD && Subtarget->hasSSE41())){ + ((Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) || + (Ext == ISD::SEXTLOAD && Subtarget->hasSSE41()))){ assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); -- cgit v1.2.3-18-g5258 From 0340557fb830e3669c4c48a2cd99d7703bdda452 Mon Sep 17 00:00:00 2001 From: Patrik Hagglund Date: Wed, 19 Dec 2012 11:30:36 +0000 Subject: Change TargetLowering::findRepresentativeClass to take an MVT, instead of EVT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170532 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 93ea3721fe..dd49d77aec 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1479,10 +1479,10 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, // FIXME: Why this routine is here? Move to RegInfo! std::pair -X86TargetLowering::findRepresentativeClass(EVT VT) const{ +X86TargetLowering::findRepresentativeClass(MVT VT) const{ const TargetRegisterClass *RRC = 0; uint8_t Cost = 1; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(VT); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c51460bc42..8ac543da4b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -716,7 +716,7 @@ namespace llvm { protected: std::pair - findRepresentativeClass(EVT VT) const; + findRepresentativeClass(MVT VT) const; private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can -- cgit v1.2.3-18-g5258 From dfcf33a287d1756721f1f735af687595ce2f5a21 Mon Sep 17 00:00:00 2001 From: Patrik Hagglund Date: Wed, 19 Dec 2012 11:48:16 +0000 Subject: Change TargetLowering::RegisterTypeForVT to contain MVTs, instead of EVTs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170535 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 32baf4bb76..313bf2232c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1909,11 +1909,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { ComputeValueVTs(TLI, I->getType(), RetTys); for (unsigned i = 0, e = RetTys.size(); i != e; ++i) { EVT VT = RetTys[i]; - EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); + MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT); for (unsigned j = 0; j != NumRegs; ++j) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT.getSimpleVT(); + MyFlags.VT = RegisterVT; MyFlags.Used = !CS.getInstruction()->use_empty(); if (CS.paramHasAttr(0, Attribute::SExt)) MyFlags.Flags.setSExt(); -- cgit v1.2.3-18-g5258 From e5c65911a659e49320d214bf0702793ad37b5ed5 Mon Sep 17 00:00:00 2001 From: Patrik Hagglund Date: Wed, 19 Dec 2012 12:02:25 +0000 Subject: Change TargetLowering::getTypeForExtArgOrReturn to take and return MVTs, instead of EVTs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170537 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 +++--- lib/Target/X86/X86ISelLowering.h | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dd49d77aec..1e64741c25 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1696,8 +1696,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return true; } -EVT -X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, +MVT +X86TargetLowering::getTypeForExtArgOrReturn(MVT VT, ISD::NodeType ExtendKind) const { MVT ReturnMVT; // TODO: Is this also valid on 32-bit? @@ -1706,7 +1706,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, else ReturnMVT = MVT::i32; - EVT MinVT = getRegisterType(Context, ReturnMVT); + MVT MinVT = getRegisterType(ReturnMVT); return VT.bitsLT(MinVT) ? MinVT : VT; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8ac543da4b..5be7f095a4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -867,9 +867,8 @@ namespace llvm { virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; - virtual EVT - getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, - ISD::NodeType ExtendKind) const; + virtual MVT + getTypeForExtArgOrReturn(MVT VT, ISD::NodeType ExtendKind) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, -- cgit v1.2.3-18-g5258 From 6da2e22dffe9dd0255e10a8934f2879eb7e87868 Mon Sep 17 00:00:00 2001 From: Paul Redmond Date: Wed, 19 Dec 2012 19:47:13 +0000 Subject: Transform (x&C)>V into (x&C)!=0 where possible When the least bit of C is greater than V, (x&C) must be greater than V if it is not zero, so the comparison can be simplified. Although this was suggested in Target/X86/README.txt, it benefits any architecture with a directly testable form of AND. Patch by Kevin Schoedel git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README.txt | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 6a8a4fdf25..b4285a0718 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1567,43 +1567,6 @@ The first one is done for all AMDs, Core2, and "Generic" The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" -//===---------------------------------------------------------------------===// - -Testcase: -int a(int x) { return (x & 127) > 31; } - -Current output: - movl 4(%esp), %eax - andl $127, %eax - cmpl $31, %eax - seta %al - movzbl %al, %eax - ret - -Ideal output: - xorl %eax, %eax - testl $96, 4(%esp) - setne %al - ret - -This should definitely be done in instcombine, canonicalizing the range -condition into a != condition. We get this IR: - -define i32 @a(i32 %x) nounwind readnone { -entry: - %0 = and i32 %x, 127 ; [#uses=1] - %1 = icmp ugt i32 %0, 31 ; [#uses=1] - %2 = zext i1 %1 to i32 ; [#uses=1] - ret i32 %2 -} - -Instcombine prefers to strength reduce relational comparisons to equality -comparisons when possible, this should be another case of that. This could -be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it -looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already -be redesigned to use ComputeMaskedBits and friends. - - //===---------------------------------------------------------------------===// Testcase: int x(int a) { return (a&0xf0)>>4; } -- cgit v1.2.3-18-g5258 From 759e3fa641d0ad01012d16d913015c9f69c8d2ab Mon Sep 17 00:00:00 2001 From: Roman Divacky Date: Wed, 19 Dec 2012 19:55:47 +0000 Subject: Remove edis - the enhanced disassembler. Fixes PR14654. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170578 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/CMakeLists.txt | 1 - lib/Target/X86/Disassembler/X86Disassembler.cpp | 6 ------ lib/Target/X86/Disassembler/X86Disassembler.h | 4 ---- lib/Target/X86/Makefile | 3 +-- 4 files changed, 1 insertion(+), 13 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index f4d03a602c..19912cc6bc 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -10,7 +10,6 @@ tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel) tablegen(LLVM X86GenFastISel.inc -gen-fast-isel) tablegen(LLVM X86GenCallingConv.inc -gen-callingconv) tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM X86GenEDInfo.inc -gen-enhanced-disassembly-info) add_public_tablegen_target(X86CommonTableGen) set(sources diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index ed61c01130..ca6f80ce3e 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -16,7 +16,6 @@ #include "X86Disassembler.h" #include "X86DisassemblerDecoder.h" -#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCExpr.h" @@ -32,7 +31,6 @@ #include "X86GenRegisterInfo.inc" #define GET_INSTRINFO_ENUM #include "X86GenInstrInfo.inc" -#include "X86GenEDInfo.inc" using namespace llvm; using namespace llvm::X86Disassembler; @@ -83,10 +81,6 @@ X86GenericDisassembler::~X86GenericDisassembler() { delete MII; } -const EDInstInfo *X86GenericDisassembler::getEDInfo() const { - return instInfoX86; -} - /// regionReader - a callback function that wraps the readByte method from /// MemoryObject. /// diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 981701f527..b92427a7e9 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -95,8 +95,6 @@ class MCSubtargetInfo; class MemoryObject; class raw_ostream; -struct EDInstInfo; - namespace X86Disassembler { /// X86GenericDisassembler - Generic disassembler for all X86 platforms. @@ -122,8 +120,6 @@ public: raw_ostream &vStream, raw_ostream &cStream) const; - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; private: DisassemblerMode fMode; }; diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 949661eb99..e518fecf04 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -16,8 +16,7 @@ BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \ X86GenAsmWriter.inc X86GenAsmMatcher.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenDisassemblerTables.inc X86GenFastISel.inc \ - X86GenCallingConv.inc X86GenSubtargetInfo.inc \ - X86GenEDInfo.inc + X86GenCallingConv.inc X86GenSubtargetInfo.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils -- cgit v1.2.3-18-g5258 From 37a942cd52725b1d390989a8267a764b42fcb5d3 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 19 Dec 2012 21:31:56 +0000 Subject: Remove the explicit MachineInstrBuilder(MI) constructor. Use the version that also takes an MF reference instead. It would technically be possible to extract an MF reference from the MI as MI->getParent()->getParent(), but that would not work for MIs that are not inserted into any basic block. Given the reasonably small number of places this constructor was used at all, I preferred the compile time check to a run time assertion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170588 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 0363276bc1..81149b702d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3525,43 +3525,44 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, /// to: /// %xmm4 = PXORrr %xmm4, %xmm4 /// -static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) { +static bool Expand2AddrUndef(MachineInstrBuilder &MIB, + const MCInstrDesc &Desc) { assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); - unsigned Reg = MI->getOperand(0).getReg(); - MI->setDesc(Desc); + unsigned Reg = MIB->getOperand(0).getReg(); + MIB->setDesc(Desc); // MachineInstr::addOperand() will insert explicit operands before any // implicit operands. - MachineInstrBuilder(MI).addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); // But we don't trust that. - assert(MI->getOperand(1).getReg() == Reg && - MI->getOperand(2).getReg() == Reg && "Misplaced operand"); + assert(MIB->getOperand(1).getReg() == Reg && + MIB->getOperand(2).getReg() == Reg && "Misplaced operand"); return true; } bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = TM.getSubtarget().hasAVX(); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); switch (MI->getOpcode()) { case X86::SETB_C8r: - return Expand2AddrUndef(MI, get(X86::SBB8rr)); + return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: - return Expand2AddrUndef(MI, get(X86::SBB16rr)); + return Expand2AddrUndef(MIB, get(X86::SBB16rr)); case X86::SETB_C32r: - return Expand2AddrUndef(MI, get(X86::SBB32rr)); + return Expand2AddrUndef(MIB, get(X86::SBB32rr)); case X86::SETB_C64r: - return Expand2AddrUndef(MI, get(X86::SBB64rr)); + return Expand2AddrUndef(MIB, get(X86::SBB64rr)); case X86::V_SET0: case X86::FsFLD0SS: case X86::FsFLD0SD: - return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); + return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::AVX_SET0: assert(HasAVX && "AVX not supported"); - return Expand2AddrUndef(MI, get(X86::VXORPSYrr)); + return Expand2AddrUndef(MIB, get(X86::VXORPSYrr)); case X86::V_SETALLONES: - return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); + return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); case X86::AVX2_SETALLONES: - return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr)); + return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; @@ -3587,9 +3588,10 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MachineInstr *MI, const TargetInstrInfo &TII) { // Create the base instruction with the memory operand as the first part. + // Omit the implicit operands, something BuildMI can't do. MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); - MachineInstrBuilder MIB(NewMI); + MachineInstrBuilder MIB(MF, NewMI); unsigned NumAddrOps = MOs.size(); for (unsigned i = 0; i != NumAddrOps; ++i) MIB.addOperand(MOs[i]); @@ -3613,9 +3615,10 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, const SmallVectorImpl &MOs, MachineInstr *MI, const TargetInstrInfo &TII) { + // Omit the implicit operands, something BuildMI can't do. MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); - MachineInstrBuilder MIB(NewMI); + MachineInstrBuilder MIB(MF, NewMI); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -4155,7 +4158,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the data processing instruction. MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true); - MachineInstrBuilder MIB(DataMI); + MachineInstrBuilder MIB(MF, DataMI); if (FoldedStore) MIB.addReg(Reg, RegState::Define); -- cgit v1.2.3-18-g5258 From fbf3b4a07690751f72302757058ab0298dfb832e Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 19 Dec 2012 23:38:53 +0000 Subject: MC: Add MCInstrDesc::mayAffectControlFlow() method. MC disassembler clients (LLDB) are interested in querying if an instruction may affect control flow other than by virtue of being an explicit branch instruction. For example, instructions which write directly to the PC on some architectures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170610 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 3 ++- lib/Target/X86/X86RegisterInfo.cpp | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index f66b203f0d..5e84530cd7 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -257,7 +257,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitX86MCRegisterInfo(X, RA, X86_MC::getDwarfRegFlavour(TT, false), - X86_MC::getDwarfRegFlavour(TT, true)); + X86_MC::getDwarfRegFlavour(TT, true), + RA); X86_MC::InitLLVM2SEHRegisterMapping(X); return X; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1934b28fed..6b36101a12 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -56,10 +56,12 @@ EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) - : X86GenRegisterInfo(tm.getSubtarget().is64Bit() - ? X86::RIP : X86::EIP, + : X86GenRegisterInfo((tm.getSubtarget().is64Bit() + ? X86::RIP : X86::EIP), X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false), - X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)), + X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true), + (tm.getSubtarget().is64Bit() + ? X86::RIP : X86::EIP)), TM(tm), TII(tii) { X86_MC::InitLLVM2SEHRegisterMapping(this); -- cgit v1.2.3-18-g5258 From ba836a2e803e51cc26279a8522f05c7452729fe3 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Thu, 20 Dec 2012 04:04:17 +0000 Subject: Fix use-before-construction of X86TargetLowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170654 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetMachine.cpp | 4 ++-- lib/Target/X86/X86TargetMachine.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index b7a79563b2..ea99796f35 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -46,8 +46,8 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-" "n8:16:32-S128"), InstrInfo(*this), - TSInfo(*this), TLInfo(*this), + TSInfo(*this), JITInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) { } @@ -63,8 +63,8 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" "n8:16:32:64-S128"), InstrInfo(*this), - TSInfo(*this), TLInfo(*this), + TSInfo(*this), JITInfo(*this), STTI(&TLInfo), VTTI(&TLInfo){ } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 792f721e76..57ff1f0d7b 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -78,8 +78,8 @@ class X86_32TargetMachine : public X86TargetMachine { virtual void anchor(); const DataLayout DL; // Calculates type size & alignment X86InstrInfo InstrInfo; - X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; + X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; ScalarTargetTransformImpl STTI; X86VectorTargetTransformInfo VTTI; @@ -115,8 +115,8 @@ class X86_64TargetMachine : public X86TargetMachine { virtual void anchor(); const DataLayout DL; // Calculates type size & alignment X86InstrInfo InstrInfo; - X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; + X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; X86ScalarTargetTransformImpl STTI; X86VectorTargetTransformInfo VTTI; -- cgit v1.2.3-18-g5258 From 6af228a92a7b8414fa3c1b3c37ee659d32e66e1b Mon Sep 17 00:00:00 2001 From: Roman Divacky Date: Thu, 20 Dec 2012 14:43:30 +0000 Subject: Remove MCTargetAsmLexer and its derived classes now that edis, its only user, is gone. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170699 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/CMakeLists.txt | 1 - lib/Target/X86/AsmParser/X86AsmLexer.cpp | 159 ------------------------------ lib/Target/X86/AsmParser/X86AsmParser.cpp | 4 - 3 files changed, 164 deletions(-) delete mode 100644 lib/Target/X86/AsmParser/X86AsmLexer.cpp (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt index 47489bb06c..54204d4b63 100644 --- a/lib/Target/X86/AsmParser/CMakeLists.txt +++ b/lib/Target/X86/AsmParser/CMakeLists.txt @@ -1,7 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMX86AsmParser - X86AsmLexer.cpp X86AsmParser.cpp ) diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp deleted file mode 100644 index b12399d447..0000000000 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ /dev/null @@ -1,159 +0,0 @@ -//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/X86BaseInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCTargetAsmLexer.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -namespace { - -class X86AsmLexer : public MCTargetAsmLexer { - const MCAsmInfo &AsmInfo; - - bool tentativeIsValid; - AsmToken tentativeToken; - - const AsmToken &lexTentative() { - tentativeToken = getLexer()->Lex(); - tentativeIsValid = true; - return tentativeToken; - } - - const AsmToken &lexDefinite() { - if (tentativeIsValid) { - tentativeIsValid = false; - return tentativeToken; - } - return getLexer()->Lex(); - } - - AsmToken LexTokenATT(); - AsmToken LexTokenIntel(); -protected: - AsmToken LexToken() { - if (!Lexer) { - SetError(SMLoc(), "No MCAsmLexer installed"); - return AsmToken(AsmToken::Error, "", 0); - } - - switch (AsmInfo.getAssemblerDialect()) { - default: - SetError(SMLoc(), "Unhandled dialect"); - return AsmToken(AsmToken::Error, "", 0); - case 0: - return LexTokenATT(); - case 1: - return LexTokenIntel(); - } - } -public: - X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) - : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) { - } -}; - -} // end anonymous namespace - -#define GET_REGISTER_MATCHER -#include "X86GenAsmMatcher.inc" - -AsmToken X86AsmLexer::LexTokenATT() { - AsmToken lexedToken = lexDefinite(); - - switch (lexedToken.getKind()) { - default: - return lexedToken; - case AsmToken::Error: - SetError(Lexer->getErrLoc(), Lexer->getErr()); - return lexedToken; - - case AsmToken::Percent: { - const AsmToken &nextToken = lexTentative(); - if (nextToken.getKind() != AsmToken::Identifier) - return lexedToken; - - if (unsigned regID = MatchRegisterName(nextToken.getString())) { - lexDefinite(); - - // FIXME: This is completely wrong when there is a space or other - // punctuation between the % and the register name. - StringRef regStr(lexedToken.getString().data(), - lexedToken.getString().size() + - nextToken.getString().size()); - - return AsmToken(AsmToken::Register, regStr, - static_cast(regID)); - } - - // Match register name failed. If this is "db[0-7]", match it as an alias - // for dr[0-7]. - if (nextToken.getString().size() == 3 && - nextToken.getString().startswith("db")) { - int RegNo = -1; - switch (nextToken.getString()[2]) { - case '0': RegNo = X86::DR0; break; - case '1': RegNo = X86::DR1; break; - case '2': RegNo = X86::DR2; break; - case '3': RegNo = X86::DR3; break; - case '4': RegNo = X86::DR4; break; - case '5': RegNo = X86::DR5; break; - case '6': RegNo = X86::DR6; break; - case '7': RegNo = X86::DR7; break; - } - - if (RegNo != -1) { - lexDefinite(); - - // FIXME: This is completely wrong when there is a space or other - // punctuation between the % and the register name. - StringRef regStr(lexedToken.getString().data(), - lexedToken.getString().size() + - nextToken.getString().size()); - return AsmToken(AsmToken::Register, regStr, - static_cast(RegNo)); - } - } - - - return lexedToken; - } - } -} - -AsmToken X86AsmLexer::LexTokenIntel() { - const AsmToken &lexedToken = lexDefinite(); - - switch(lexedToken.getKind()) { - default: - return lexedToken; - case AsmToken::Error: - SetError(Lexer->getErrLoc(), Lexer->getErr()); - return lexedToken; - case AsmToken::Identifier: { - unsigned regID = MatchRegisterName(lexedToken.getString().lower()); - - if (regID) - return AsmToken(AsmToken::Register, - lexedToken.getString(), - static_cast(regID)); - return lexedToken; - } - } -} - -extern "C" void LLVMInitializeX86AsmLexer() { - RegisterMCAsmLexer X(TheX86_32Target); - RegisterMCAsmLexer Y(TheX86_64Target); -} diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index fdb7583ed0..cbdaeffc44 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2059,14 +2059,10 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { return false; } - -extern "C" void LLVMInitializeX86AsmLexer(); - // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { RegisterMCAsmParser X(TheX86_32Target); RegisterMCAsmParser Y(TheX86_64Target); - LLVMInitializeX86AsmLexer(); } #define GET_REGISTER_MATCHER -- cgit v1.2.3-18-g5258 From be06aacaa9a270384599bbfa850b967e9996b9fb Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 20 Dec 2012 22:54:02 +0000 Subject: Add an MF argument to MI::copyImplicitOps(). This function is often used to decorate dangling instructions, so a context reference is required to allocate memory for the operands. Also add a corresponding MachineInstrBuilder method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170797 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 3deec5cd7f..1d5457297d 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1138,7 +1138,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } MachineInstr *NewMI = prior(MBBI); - NewMI->copyImplicitOps(MBBI); + NewMI->copyImplicitOps(MF, MBBI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); -- cgit v1.2.3-18-g5258 From f5637c399711e37287e01f9d9ca9ce7cd2f3d14f Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 21 Dec 2012 01:33:59 +0000 Subject: Improve the X86 cost model for loads and stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170830 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 24 ++++++++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 4 ++++ 2 files changed, 28 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1e64741c25..6b650726b6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17818,6 +17818,30 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); } + +unsigned +X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair LT = getTypeLegalizationCost(Src); + assert(Opcode == Instruction::Load || Opcode == Instruction::Store && + "Invalid Opcode"); + + const X86Subtarget &ST = + TLI->getTargetMachine().getSubtarget(); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // On Sandybridge 256bit load/stores are double pumped + // (but not on Haswell). + if (LT.second.getSizeInBits() > 128 && !ST.hasAVX2()) + Cost*=2; + + return Cost; +} + unsigned X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 5be7f095a4..72cd3b3f5b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -953,6 +953,10 @@ namespace llvm { virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; -- cgit v1.2.3-18-g5258 From 042a9a2666690d0170964df3d0b042b7bc4651d5 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 21 Dec 2012 05:02:12 +0000 Subject: Add a missing "virtual" keyword. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170842 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 72cd3b3f5b..3d234eba05 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -960,8 +960,8 @@ namespace llvm { virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; -- cgit v1.2.3-18-g5258 From 739c7a83e16e7daaf22cfa4ae84e8d1cc0260941 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 21 Dec 2012 14:04:55 +0000 Subject: X86: Match the SSE/AVX min/max vector ops using a custom node instead of intrinsics This is very mechanical, no functionality change. Preparation for PR14667. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170898 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 38 +++++++ lib/Target/X86/X86ISelLowering.h | 6 + lib/Target/X86/X86InstrFragmentsSIMD.td | 5 + lib/Target/X86/X86InstrInfo.cpp | 24 ++++ lib/Target/X86/X86InstrSSE.td | 195 ++++++++++++++++---------------- 5 files changed, 171 insertions(+), 97 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6b650726b6..6f12d6cff0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10154,6 +10154,40 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } + // SSE2/SSE41/AVX2 integer max/min intrinsics. + case Intrinsic::x86_sse2_pmaxu_b: + case Intrinsic::x86_sse41_pmaxuw: + case Intrinsic::x86_sse41_pmaxud: + case Intrinsic::x86_avx2_pmaxu_b: + case Intrinsic::x86_avx2_pmaxu_w: + case Intrinsic::x86_avx2_pmaxu_d: + return DAG.getNode(X86ISD::UMAX, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_pminu_b: + case Intrinsic::x86_sse41_pminuw: + case Intrinsic::x86_sse41_pminud: + case Intrinsic::x86_avx2_pminu_b: + case Intrinsic::x86_avx2_pminu_w: + case Intrinsic::x86_avx2_pminu_d: + return DAG.getNode(X86ISD::UMIN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_pmaxsb: + case Intrinsic::x86_sse2_pmaxs_w: + case Intrinsic::x86_sse41_pmaxsd: + case Intrinsic::x86_avx2_pmaxs_b: + case Intrinsic::x86_avx2_pmaxs_w: + case Intrinsic::x86_avx2_pmaxs_d: + return DAG.getNode(X86ISD::SMAX, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_pminsb: + case Intrinsic::x86_sse2_pmins_w: + case Intrinsic::x86_sse41_pminsd: + case Intrinsic::x86_avx2_pmins_b: + case Intrinsic::x86_avx2_pmins_w: + case Intrinsic::x86_avx2_pmins_d: + return DAG.getNode(X86ISD::SMIN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + // AVX2 variable shift intrinsics case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: @@ -11974,6 +12008,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; + case X86ISD::UMAX: return "X86ISD::UMAX"; + case X86ISD::UMIN: return "X86ISD::UMIN"; + case X86ISD::SMAX: return "X86ISD::SMAX"; + case X86ISD::SMIN: return "X86ISD::SMIN"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3d234eba05..eec1f7e720 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -197,6 +197,12 @@ namespace llvm { /// FHSUB - Floating point horizontal sub. FHSUB, + /// UMAX, UMIN - Unsigned integer max and min. + UMAX, UMIN, + + /// SMAX, SMIN - Signed integer max and min. + SMAX, SMIN, + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7d16d2741d..7025e93fa1 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -27,6 +27,11 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>]>; +def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>; +def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>; +def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>; +def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>; + def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 81149b702d..7bbab3862d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -775,6 +775,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, + { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 }, + { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 }, + { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 }, + { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 }, + { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, + { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, + { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, + { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 }, { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, @@ -951,6 +959,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 }, { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 }, { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 }, + { X86::VPMINSBrr, X86::VPMINSBrm, TB_ALIGN_16 }, + { X86::VPMINSDrr, X86::VPMINSDrm, TB_ALIGN_16 }, + { X86::VPMINUDrr, X86::VPMINUDrm, TB_ALIGN_16 }, + { X86::VPMINUWrr, X86::VPMINUWrm, TB_ALIGN_16 }, + { X86::VPMAXSBrr, X86::VPMAXSBrm, TB_ALIGN_16 }, + { X86::VPMAXSDrr, X86::VPMAXSDrm, TB_ALIGN_16 }, + { X86::VPMAXUDrr, X86::VPMAXUDrm, TB_ALIGN_16 }, + { X86::VPMAXUWrr, X86::VPMAXUWrm, TB_ALIGN_16 }, { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 }, { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 }, { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 }, @@ -1092,6 +1108,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 }, { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 }, { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 }, + { X86::VPMINSBYrr, X86::VPMINSBYrm, TB_ALIGN_32 }, + { X86::VPMINSDYrr, X86::VPMINSDYrm, TB_ALIGN_32 }, + { X86::VPMINUDYrr, X86::VPMINUDYrm, TB_ALIGN_32 }, + { X86::VPMINUWYrr, X86::VPMINUWYrm, TB_ALIGN_32 }, + { X86::VPMAXSBYrr, X86::VPMAXSBYrm, TB_ALIGN_32 }, + { X86::VPMAXSDYrr, X86::VPMAXSDYrm, TB_ALIGN_32 }, + { X86::VPMAXUDYrr, X86::VPMAXUDYrm, TB_ALIGN_32 }, + { X86::VPMAXUWYrr, X86::VPMAXUWYrm, TB_ALIGN_32 }, { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 }, { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 }, { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 521073d0a8..69e2b4390f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3733,6 +3733,14 @@ defm VPSUBUSW : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v8i16, VR128, defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; +defm VPMINUB : PDI_binop_rm<0xDA, "vpminub", X86umin, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; +defm VPMINSW : PDI_binop_rm<0xEA, "vpminsw", X86smin, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; +defm VPMAXUB : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; +defm VPMAXSW : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; // Intrinsic forms defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, @@ -3768,18 +3776,6 @@ defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; @@ -3813,6 +3809,18 @@ defm VPSUBUSWY : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v16i16, VR256, defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, VR256, memopv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; +defm VPMINUBY : PDI_binop_rm<0xDA, "vpminub", X86umin, v32i8, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; +defm VPMINSWY : PDI_binop_rm<0xEA, "vpminsw", X86smin, v16i16, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; +defm VPMAXUBY : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v32i8, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; +defm VPMAXSWY : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v16i16, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; // Intrinsic forms defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, @@ -3848,18 +3856,6 @@ defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, VR256, memopv4i64, i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, VR256, memopv4i64, i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; @@ -3890,6 +3886,14 @@ defm PSUBUSW : PDI_binop_rm<0xD9, "psubusw", X86subus, v8i16, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P>; defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; +defm PMINUB : PDI_binop_rm<0xDA, "pminub", X86umin, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1>; +defm PMINSW : PDI_binop_rm<0xEA, "pminsw", X86smin, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1>; +defm PMAXUB : PDI_binop_rm<0xDE, "pmaxub", X86umax, v16i8, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1>; +defm PMAXSW : PDI_binop_rm<0xEE, "pmaxsw", X86smax, v8i16, VR128, memopv2i64, + i128mem, SSE_INTALU_ITINS_P, 1>; // Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, @@ -3925,18 +3929,6 @@ defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>; -defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, VR128, memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>; @@ -6701,67 +6693,6 @@ multiclass SS41I_binop_rm_int_y opc, string OpcodeStr, (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; } -let Predicates = [HasAVX] in { - let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, - 0>, VEX_4V; - defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, - 0>, VEX_4V; - defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, - 0>, VEX_4V; - defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, - 0>, VEX_4V; - defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, - 0>, VEX_4V; - defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, - 0>, VEX_4V; - defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, - 0>, VEX_4V; - defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, - 0>, VEX_4V; - defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, - 0>, VEX_4V; - defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, - 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { - let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", - int_x86_avx2_packusdw>, VEX_4V, VEX_L; - defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb", - int_x86_avx2_pmins_b>, VEX_4V, VEX_L; - defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd", - int_x86_avx2_pmins_d>, VEX_4V, VEX_L; - defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud", - int_x86_avx2_pminu_d>, VEX_4V, VEX_L; - defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw", - int_x86_avx2_pminu_w>, VEX_4V, VEX_L; - defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb", - int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L; - defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd", - int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L; - defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud", - int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L; - defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw", - int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L; - defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", - int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { - let isCommutable = 0 in - defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; - defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; - defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; - defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; - defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; - defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; - defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; - defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; - defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; - defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; -} /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, @@ -6784,6 +6715,76 @@ multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, (bitconvert (memop_frag addr:$src2)))))]>, OpSize; } +let Predicates = [HasAVX] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, + 0>, VEX_4V; + defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, + 0>, VEX_4V; +} + +let Predicates = [HasAVX2] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", + int_x86_avx2_packusdw>, VEX_4V, VEX_L; + defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", + int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; +} + +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in + defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; + defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, + memopv2i64, i128mem>; + defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, + memopv2i64, i128mem>; + defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, + memopv2i64, i128mem>; + defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, + memopv2i64, i128mem>; + defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, + memopv2i64, i128mem>; + defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, + memopv2i64, i128mem>; + defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, + memopv2i64, i128mem>; + defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, + memopv2i64, i128mem>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; +} + let Predicates = [HasAVX] in { defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, memopv2i64, i128mem, 0>, VEX_4V; -- cgit v1.2.3-18-g5258 From 2556c6b4b67c4a45ad90b256d4913f2048d2fba6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 21 Dec 2012 17:46:58 +0000 Subject: X86: Match pmin/pmax as a target specific dag combine. This occurs during vectorization. Part of PR14667. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170908 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 77 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6f12d6cff0..444163d663 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14679,6 +14679,76 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match. +static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + if (!VT.isVector()) + return 0; + + switch (VT.getSimpleVT().SimpleTy) { + default: return 0; + case MVT::v32i8: + case MVT::v16i16: + case MVT::v8i32: + if (!Subtarget->hasAVX2()) + return 0; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + if (!Subtarget->hasSSE2()) + return 0; + } + + // SSE2 has only a small subset of the operations. + bool hasUnsigned = Subtarget->hasSSE41() || + (Subtarget->hasSSE2() && VT == MVT::v16i8); + bool hasSigned = Subtarget->hasSSE41() || + (Subtarget->hasSSE2() && VT == MVT::v8i16); + + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + + // Check for x CC y ? x : y. + if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && + DAG.isEqualTo(RHS, Cond.getOperand(1))) { + switch (CC) { + default: break; + case ISD::SETULT: + case ISD::SETULE: + return hasUnsigned ? X86ISD::UMIN : 0; + case ISD::SETUGT: + case ISD::SETUGE: + return hasUnsigned ? X86ISD::UMAX : 0; + case ISD::SETLT: + case ISD::SETLE: + return hasSigned ? X86ISD::SMIN : 0; + case ISD::SETGT: + case ISD::SETGE: + return hasSigned ? X86ISD::SMAX : 0; + } + // Check for x CC y ? y : x -- a min/max with reversed arms. + } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && + DAG.isEqualTo(RHS, Cond.getOperand(0))) { + switch (CC) { + default: break; + case ISD::SETULT: + case ISD::SETULE: + return hasUnsigned ? X86ISD::UMAX : 0; + case ISD::SETUGT: + case ISD::SETUGE: + return hasUnsigned ? X86ISD::UMIN : 0; + case ISD::SETLT: + case ISD::SETLE: + return hasSigned ? X86ISD::SMAX : 0; + case ISD::SETGT: + case ISD::SETGE: + return hasSigned ? X86ISD::SMIN : 0; + } + } + + return 0; +} + /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, @@ -15018,6 +15088,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } + // Try to match a min/max vector operation. + if (!DCI.isBeforeLegalize() && + N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) + if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget)) + return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS); + + // If we know that this node is legal then we know that it is going to be // matched by one of the SSE/AVX BLEND instructions. These instructions only // depend on the highest bit in each word. Try to use SimplifyDemandedBits -- cgit v1.2.3-18-g5258 From d0696ef8c33b9b2504e89bc0aab2ea99a6c90756 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 21 Dec 2012 23:48:49 +0000 Subject: In some cases, due to scheduling constraints we copy the EFLAGS. The only way to read the eflags is using push and pop. If we don't adjust the stack then we run over the first frame index. This is not something that we want to do, so we have to make sure that our machine function does not copy the flags. If it does then we have to emit the prolog that adjusts the stack. rdar://12896831 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170961 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 20 +++++++++++++++++++- lib/Target/X86/X86InstrInfo.cpp | 2 ++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 1d5457297d..cf6c5c86d3 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -625,6 +625,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { return CompactUnwindEncoding; } +/// colobbersTheStack - This function checks if any of the users of EFLAGS +/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has +/// to use the stack, and if we don't adjust the stack we clobber the first +/// frame index. +/// See X86InstrInfo::copyPhysReg. +static bool colobbersTheStack(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), + re = MRI.reg_end(); ri != re; ++ri) + if (ri->isCopy()) + return true; + + return false; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -673,12 +689,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the - // stack pointer (we fit in the Red Zone). + // stack pointer (we fit in the Red Zone). We also check that we don't + // push and pop from the stack. if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone + !colobbersTheStack(MF) && // Don't push and pop. !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7bbab3862d..fbfbc29a71 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2892,6 +2892,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - colobbersTheStack. if (SrcReg == X86::EFLAGS) { if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); -- cgit v1.2.3-18-g5258 From 17347912b46213658074416133396caffd034e0c Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 22 Dec 2012 11:34:28 +0000 Subject: X86: Emit vector sext as shuffle + sra if vpmovsx is not available. Also loosen the SSSE3 dependency a bit, expanded pshufb + psra is still better than scalarized loads. Fixes PR14590. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 47 +++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 8 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 444163d663..fd883075a2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16043,14 +16043,14 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::LoadExtType Ext = Ld->getExtensionType(); // If this is a vector EXT Load then attempt to optimize it using a - // shuffle. We need SSSE3 shuffles. - // SEXT loads are suppoted starting SSE41. - // We generate X86ISD::VSEXT for them. + // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the + // expansion is still better than scalar code. + // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll + // emit a shuffle and a arithmetic shift. // TODO: It is possible to support ZExt by zeroing the undef values // during the shuffle phase or after the shuffle. - if (RegVT.isVector() && RegVT.isInteger() && - ((Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) || - (Ext == ISD::SEXTLOAD && Subtarget->hasSSE41()))){ + if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() && + (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) { assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); @@ -16143,9 +16143,40 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, unsigned SizeRatio = RegSz/MemSz; if (Ext == ISD::SEXTLOAD) { - SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); - return DCI.CombineTo(N, Sext, TF, true); + // If we have SSE4.1 we can directly emit a VSEXT node. + if (Subtarget->hasSSE41()) { + SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); + return DCI.CombineTo(N, Sext, TF, true); + } + + // Otherwise we'll shuffle the small elements in the high bits of the + // larger type and perform an arithmetic shift. If the shift is not legal + // it's better to scalarize. + if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT)) + return SDValue(); + + // Redistribute the loaded elements into the different locations. + SmallVector ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i*SizeRatio + SizeRatio-1] = i; + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, + DAG.getUNDEF(WideVecVT), + &ShuffleVec[0]); + + Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + + // Build the arithmetic shift. + unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - + MemVT.getVectorElementType().getSizeInBits(); + SmallVector C(NumElems, + DAG.getConstant(Amt, RegVT.getScalarType())); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size()); + Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV); + + return DCI.CombineTo(N, Shuff, TF, true); } + // Redistribute the loaded elements into the different locations. SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) -- cgit v1.2.3-18-g5258 From 2f8a6cdfa3bc0bfa4532da89e574666c5251cdb5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 22 Dec 2012 16:07:56 +0000 Subject: X86: Turn mul of <4 x i32> into pmuludq when no SSE4.1 is available. pmuludq is slow, but it turns out that all the unpacking and packing of the scalarized mul is even slower. 10% speedup on loop-vectorized paq8p. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170985 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fd883075a2..262475e97f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -870,6 +870,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ADD, MVT::v8i16, Legal); setOperationAction(ISD::ADD, MVT::v4i32, Legal); setOperationAction(ISD::ADD, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::SUB, MVT::v16i8, Legal); setOperationAction(ISD::SUB, MVT::v8i16, Legal); @@ -11027,17 +11028,43 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) { static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntArith(Op, DAG); + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + + // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. + if (VT == MVT::v4i32) { + assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() && + "Should not custom lower when pmuldq is available!"); + + // Extract the odd parts. + const int UnpackMask[] = { 1, -1, 3, -1 }; + SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); + SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); + + // Multiply the even parts. + SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B); + // Now multiply odd parts. + SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds); + + Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens); + Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds); + + // Merge the two vectors back together with a shuffle. This expands into 2 + // shuffles. + const int ShufMask[] = { 0, 4, 2, 6 }; + return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); + } + assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Only know how to lower V2I64/V4I64 multiply"); - DebugLoc dl = Op.getDebugLoc(); - // Ahi = psrlqi(a, 32); // Bhi = psrlqi(b, 32); // @@ -11049,9 +11076,6 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // AhiBlo = psllqi(AhiBlo, 32); // return AloBlo + AloBhi + AhiBlo; - SDValue A = Op.getOperand(0); - SDValue B = Op.getOperand(1); - SDValue ShAmt = DAG.getConstant(32, MVT::i32); SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt); -- cgit v1.2.3-18-g5258 From d54fed27865dcbc69932e1e6c372bb5a932e662a Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 23 Dec 2012 07:23:55 +0000 Subject: Loop Vectorizer: Update the cost model of scatter/gather operations and make them more expensive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170995 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 262475e97f..b53a023a81 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17988,7 +17988,6 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); } - unsigned X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, -- cgit v1.2.3-18-g5258 From 677689cf5dc65404645462464682a0696cc84532 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 23 Dec 2012 07:30:09 +0000 Subject: Rename a function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index cf6c5c86d3..6345487751 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -625,12 +625,12 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { return CompactUnwindEncoding; } -/// colobbersTheStack - This function checks if any of the users of EFLAGS +/// usesTheStack - This function checks if any of the users of EFLAGS /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has /// to use the stack, and if we don't adjust the stack we clobber the first /// frame index. -/// See X86InstrInfo::copyPhysReg. -static bool colobbersTheStack(MachineFunction &MF) { +/// See X86InstrInfo::copyPhysReg. +static bool usesTheStack(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), @@ -696,7 +696,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone - !colobbersTheStack(MF) && // Don't push and pop. + !usesTheStack(MF) && // Don't push and pop. !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; -- cgit v1.2.3-18-g5258 From 40b04a481d4a1b53a549c99edc730e06b0b14fb4 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 23 Dec 2012 07:33:44 +0000 Subject: whitespace git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170997 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b53a023a81..7eff0468d0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1239,7 +1239,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. // @@ -1314,13 +1313,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setPrefFunctionAlignment(4); // 2^4 bytes. } - EVT X86TargetLowering::getSetCCResultType(EVT VT) const { if (!VT.isVector()) return MVT::i8; return VT.changeVectorElementTypeToInteger(); } - /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { @@ -1525,7 +1522,6 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, return true; } - //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1773,7 +1769,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, return Chain; } - //===----------------------------------------------------------------------===// // C & StdCall & Fast Calling Convention implementation //===----------------------------------------------------------------------===// @@ -2664,7 +2659,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ins, dl, DAG, InVals); } - //===----------------------------------------------------------------------===// // Fast Calling Convention (tail call) implementation //===----------------------------------------------------------------------===// @@ -2973,7 +2967,6 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, return X86::createFastISel(funcInfo, libInfo); } - //===----------------------------------------------------------------------===// // Other Lowering Hooks //===----------------------------------------------------------------------===// @@ -3084,7 +3077,6 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); } - bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement) { // Offset should fit into 32 bit immediate field. @@ -6997,7 +6989,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, getShuffleCLImmediate(SVOp), DAG); - //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -7099,7 +7090,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, return SDValue(); } - SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { @@ -7464,7 +7454,6 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); - // With PIC, the address is actually $g + Offset. if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && !Subtarget->is64Bit()) { @@ -7851,7 +7840,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for this target."); } - /// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values /// and take a 2 x i32 value to shift plus a shift amount. SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{ @@ -9076,7 +9064,6 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } - SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Cond; SDValue Op0 = Op.getOperand(0); @@ -9729,7 +9716,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { Chain, Dest, CC, Cond); } - // Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. // Calls to _alloca is needed to probe the stack when allocating more than 4k // bytes in one go. Touching the stack at 4K increments is necessary to ensure @@ -10866,7 +10852,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), MachineMemOperand::MOStore, 2, 2); @@ -10899,7 +10884,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, DAG.getConstant(1, MVT::i16)), DAG.getConstant(3, MVT::i16)); - return DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } @@ -11452,7 +11436,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, } } - static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); @@ -11537,7 +11520,6 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); } - static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { EVT T = Op.getValueType(); @@ -12190,7 +12172,6 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } - bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; @@ -14388,7 +14369,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } - /// PerformTruncateCombine - Converts truncate operation to /// a sequence of vector shuffle operations. /// It is possible when we truncate 256-bit vector to 128-bit vector @@ -15118,7 +15098,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget)) return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS); - // If we know that this node is legal then we know that it is going to be // matched by one of the SSE/AVX BLEND instructions. These instructions only // depend on the highest bit in each word. Try to use SimplifyDemandedBits @@ -15416,7 +15395,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } - /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. /// LEA + SHL, LEA + LEA. @@ -15505,7 +15483,6 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { } } - // Hardware support for vector shifts is sparse which makes us scalarize the // vector operations in many cases. Also, on sandybridge ADD is faster than // shl. @@ -15649,7 +15626,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } - // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) // where both setccs reference the same FP CMP, and rewrite for CMPEQSS // and friends. Likewise for OR -> CMPNEQSS. @@ -16334,7 +16310,6 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, Chains.size()); } - // Turn load->store of MMX types into GPR load/stores. This avoids clobbering // the FP state in cases where an emms may be missing. // A preferable solution to the general problem is to figure out the right @@ -16640,7 +16615,6 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { N->getOperand(0), N->getOperand(1)); } - /// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { // FAND(0.0, x) -> 0.0 @@ -17378,8 +17352,6 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return false; } - - /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. X86TargetLowering::ConstraintType -- cgit v1.2.3-18-g5258 From ace0c2fad7c581367cc2519e1d773bca37fc9fec Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 24 Dec 2012 09:40:33 +0000 Subject: Some x86 instructions can load/store one of the operands to memory. On SSE, this memory needs to be aligned. When these instructions are encoded in VEX (on AVX) there is no such requirement. This changes the folding tables and removes the alignment restrictions from VEX-encoded instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 520 ++++++++++++++++++++-------------------- 1 file changed, 260 insertions(+), 260 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index fbfbc29a71..876a63f87b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -854,31 +854,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, - { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 }, - { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 }, + { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, - { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 }, - { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 }, + { X86::VADDPDrr, X86::VADDPDrm, 0 }, + { X86::VADDPSrr, X86::VADDPSrm, 0 }, { X86::VADDSDrr, X86::VADDSDrm, 0 }, { X86::VADDSSrr, X86::VADDSSrm, 0 }, - { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 }, - { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 }, - { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 }, - { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 }, - { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 }, - { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 }, - { X86::VBLENDPDrri, X86::VBLENDPDrmi, TB_ALIGN_16 }, - { X86::VBLENDPSrri, X86::VBLENDPSrmi, TB_ALIGN_16 }, - { X86::VBLENDVPDrr, X86::VBLENDVPDrm, TB_ALIGN_16 }, - { X86::VBLENDVPSrr, X86::VBLENDVPSrm, TB_ALIGN_16 }, - { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 }, - { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 }, + { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 }, + { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 }, + { X86::VANDNPDrr, X86::VANDNPDrm, 0 }, + { X86::VANDNPSrr, X86::VANDNPSrm, 0 }, + { X86::VANDPDrr, X86::VANDPDrm, 0 }, + { X86::VANDPSrr, X86::VANDPSrm, 0 }, + { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 }, + { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 }, + { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 }, + { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 }, + { X86::VCMPPDrri, X86::VCMPPDrmi, 0 }, + { X86::VCMPPSrri, X86::VCMPPSrmi, 0 }, { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, - { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 }, - { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 }, + { X86::VDIVPDrr, X86::VDIVPDrm, 0 }, + { X86::VDIVPSrr, X86::VDIVPSrm, 0 }, { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, { X86::VDIVSSrr, X86::VDIVSSrm, 0 }, { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 }, @@ -916,252 +916,252 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 }, { X86::VMULSDrr, X86::VMULSDrm, 0 }, { X86::VMULSSrr, X86::VMULSSrm, 0 }, - { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 }, - { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 }, - { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 }, - { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 }, - { X86::VPACKUSDWrr, X86::VPACKUSDWrm, TB_ALIGN_16 }, - { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 }, - { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 }, - { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 }, - { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 }, - { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 }, - { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 }, - { X86::VPADDUSBrr, X86::VPADDUSBrm, TB_ALIGN_16 }, - { X86::VPADDUSWrr, X86::VPADDUSWrm, TB_ALIGN_16 }, - { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 }, - { X86::VPALIGNR128rr, X86::VPALIGNR128rm, TB_ALIGN_16 }, - { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 }, - { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 }, - { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 }, - { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 }, - { X86::VPBLENDWrri, X86::VPBLENDWrmi, TB_ALIGN_16 }, - { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 }, - { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 }, - { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 }, - { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 }, - { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 }, - { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, - { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 }, - { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, - { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 }, - { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 }, - { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 }, - { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 }, - { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 }, - { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 }, - { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 }, - { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 }, - { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, - { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 }, - { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 }, - { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 }, - { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 }, - { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 }, - { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 }, - { X86::VPMINSBrr, X86::VPMINSBrm, TB_ALIGN_16 }, - { X86::VPMINSDrr, X86::VPMINSDrm, TB_ALIGN_16 }, - { X86::VPMINUDrr, X86::VPMINUDrm, TB_ALIGN_16 }, - { X86::VPMINUWrr, X86::VPMINUWrm, TB_ALIGN_16 }, - { X86::VPMAXSBrr, X86::VPMAXSBrm, TB_ALIGN_16 }, - { X86::VPMAXSDrr, X86::VPMAXSDrm, TB_ALIGN_16 }, - { X86::VPMAXUDrr, X86::VPMAXUDrm, TB_ALIGN_16 }, - { X86::VPMAXUWrr, X86::VPMAXUWrm, TB_ALIGN_16 }, - { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 }, - { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 }, - { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 }, - { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 }, - { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 }, - { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 }, - { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, - { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, - { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, - { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 }, - { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 }, - { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 }, - { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 }, - { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, - { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, - { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, - { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 }, - { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 }, - { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 }, - { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 }, - { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 }, - { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 }, - { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 }, - { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 }, - { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 }, - { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 }, - { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 }, - { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 }, - { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 }, - { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 }, - { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 }, - { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 }, - { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 }, - { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 }, - { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 }, - { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 }, - { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 }, - { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 }, - { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 }, + { X86::VORPDrr, X86::VORPDrm, 0 }, + { X86::VORPSrr, X86::VORPSrm, 0 }, + { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 }, + { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 }, + { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 }, + { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 }, + { X86::VPADDBrr, X86::VPADDBrm, 0 }, + { X86::VPADDDrr, X86::VPADDDrm, 0 }, + { X86::VPADDQrr, X86::VPADDQrm, 0 }, + { X86::VPADDSBrr, X86::VPADDSBrm, 0 }, + { X86::VPADDSWrr, X86::VPADDSWrm, 0 }, + { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 }, + { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 }, + { X86::VPADDWrr, X86::VPADDWrm, 0 }, + { X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 }, + { X86::VPANDNrr, X86::VPANDNrm, 0 }, + { X86::VPANDrr, X86::VPANDrm, 0 }, + { X86::VPAVGBrr, X86::VPAVGBrm, 0 }, + { X86::VPAVGWrr, X86::VPAVGWrm, 0 }, + { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 }, + { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 }, + { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 }, + { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 }, + { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 }, + { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 }, + { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 }, + { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 }, + { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 }, + { X86::VPHADDDrr, X86::VPHADDDrm, 0 }, + { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 }, + { X86::VPHADDWrr, X86::VPHADDWrm, 0 }, + { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 }, + { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 }, + { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 }, + { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 }, + { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 }, + { X86::VPINSRWrri, X86::VPINSRWrmi, 0 }, + { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 }, + { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 }, + { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 }, + { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 }, + { X86::VPMINSWrr, X86::VPMINSWrm, 0 }, + { X86::VPMINUBrr, X86::VPMINUBrm, 0 }, + { X86::VPMINSBrr, X86::VPMINSBrm, 0 }, + { X86::VPMINSDrr, X86::VPMINSDrm, 0 }, + { X86::VPMINUDrr, X86::VPMINUDrm, 0 }, + { X86::VPMINUWrr, X86::VPMINUWrm, 0 }, + { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 }, + { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 }, + { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 }, + { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 }, + { X86::VPMULDQrr, X86::VPMULDQrm, 0 }, + { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, 0 }, + { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 }, + { X86::VPMULHWrr, X86::VPMULHWrm, 0 }, + { X86::VPMULLDrr, X86::VPMULLDrm, 0 }, + { X86::VPMULLWrr, X86::VPMULLWrm, 0 }, + { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 }, + { X86::VPORrr, X86::VPORrm, 0 }, + { X86::VPSADBWrr, X86::VPSADBWrm, 0 }, + { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 }, + { X86::VPSIGNBrr, X86::VPSIGNBrm, 0 }, + { X86::VPSIGNWrr, X86::VPSIGNWrm, 0 }, + { X86::VPSIGNDrr, X86::VPSIGNDrm, 0 }, + { X86::VPSLLDrr, X86::VPSLLDrm, 0 }, + { X86::VPSLLQrr, X86::VPSLLQrm, 0 }, + { X86::VPSLLWrr, X86::VPSLLWrm, 0 }, + { X86::VPSRADrr, X86::VPSRADrm, 0 }, + { X86::VPSRAWrr, X86::VPSRAWrm, 0 }, + { X86::VPSRLDrr, X86::VPSRLDrm, 0 }, + { X86::VPSRLQrr, X86::VPSRLQrm, 0 }, + { X86::VPSRLWrr, X86::VPSRLWrm, 0 }, + { X86::VPSUBBrr, X86::VPSUBBrm, 0 }, + { X86::VPSUBDrr, X86::VPSUBDrm, 0 }, + { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 }, + { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 }, + { X86::VPSUBWrr, X86::VPSUBWrm, 0 }, + { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 }, + { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 }, + { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 }, + { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 }, + { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 }, + { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 }, + { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 }, + { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 }, + { X86::VPXORrr, X86::VPXORrm, 0 }, + { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 }, + { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 }, + { X86::VSUBPDrr, X86::VSUBPDrm, 0 }, + { X86::VSUBPSrr, X86::VSUBPSrm, 0 }, { X86::VSUBSDrr, X86::VSUBSDrm, 0 }, { X86::VSUBSSrr, X86::VSUBSSrm, 0 }, - { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 }, - { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 }, - { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 }, - { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 }, - { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 }, - { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }, + { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 }, + { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 }, + { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 }, + { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 }, + { X86::VXORPDrr, X86::VXORPDrm, 0 }, + { X86::VXORPSrr, X86::VXORPSrm, 0 }, // AVX 256-bit foldable instructions - { X86::VADDPDYrr, X86::VADDPDYrm, TB_ALIGN_32 }, - { X86::VADDPSYrr, X86::VADDPSYrm, TB_ALIGN_32 }, - { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, TB_ALIGN_32 }, - { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, TB_ALIGN_32 }, - { X86::VANDNPDYrr, X86::VANDNPDYrm, TB_ALIGN_32 }, - { X86::VANDNPSYrr, X86::VANDNPSYrm, TB_ALIGN_32 }, - { X86::VANDPDYrr, X86::VANDPDYrm, TB_ALIGN_32 }, - { X86::VANDPSYrr, X86::VANDPSYrm, TB_ALIGN_32 }, - { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, TB_ALIGN_32 }, - { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, TB_ALIGN_32 }, - { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, TB_ALIGN_32 }, - { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, TB_ALIGN_32 }, - { X86::VCMPPDYrri, X86::VCMPPDYrmi, TB_ALIGN_32 }, - { X86::VCMPPSYrri, X86::VCMPPSYrmi, TB_ALIGN_32 }, - { X86::VDIVPDYrr, X86::VDIVPDYrm, TB_ALIGN_32 }, - { X86::VDIVPSYrr, X86::VDIVPSYrm, TB_ALIGN_32 }, - { X86::VHADDPDYrr, X86::VHADDPDYrm, TB_ALIGN_32 }, - { X86::VHADDPSYrr, X86::VHADDPSYrm, TB_ALIGN_32 }, - { X86::VHSUBPDYrr, X86::VHSUBPDYrm, TB_ALIGN_32 }, - { X86::VHSUBPSYrr, X86::VHSUBPSYrm, TB_ALIGN_32 }, - { X86::VINSERTF128rr, X86::VINSERTF128rm, TB_ALIGN_32 }, - { X86::VMAXPDYrr, X86::VMAXPDYrm, TB_ALIGN_32 }, - { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, TB_ALIGN_32 }, - { X86::VMAXPSYrr, X86::VMAXPSYrm, TB_ALIGN_32 }, - { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, TB_ALIGN_32 }, - { X86::VMINPDYrr, X86::VMINPDYrm, TB_ALIGN_32 }, - { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, TB_ALIGN_32 }, - { X86::VMINPSYrr, X86::VMINPSYrm, TB_ALIGN_32 }, - { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, TB_ALIGN_32 }, - { X86::VMULPDYrr, X86::VMULPDYrm, TB_ALIGN_32 }, - { X86::VMULPSYrr, X86::VMULPSYrm, TB_ALIGN_32 }, - { X86::VORPDYrr, X86::VORPDYrm, TB_ALIGN_32 }, - { X86::VORPSYrr, X86::VORPSYrm, TB_ALIGN_32 }, - { X86::VPERM2F128rr, X86::VPERM2F128rm, TB_ALIGN_32 }, - { X86::VPERMILPDYrr, X86::VPERMILPDYrm, TB_ALIGN_32 }, - { X86::VPERMILPSYrr, X86::VPERMILPSYrm, TB_ALIGN_32 }, - { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, TB_ALIGN_32 }, - { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, TB_ALIGN_32 }, - { X86::VSUBPDYrr, X86::VSUBPDYrm, TB_ALIGN_32 }, - { X86::VSUBPSYrr, X86::VSUBPSYrm, TB_ALIGN_32 }, - { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, TB_ALIGN_32 }, - { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, TB_ALIGN_32 }, - { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, TB_ALIGN_32 }, - { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, TB_ALIGN_32 }, - { X86::VXORPDYrr, X86::VXORPDYrm, TB_ALIGN_32 }, - { X86::VXORPSYrr, X86::VXORPSYrm, TB_ALIGN_32 }, + { X86::VADDPDYrr, X86::VADDPDYrm, 0 }, + { X86::VADDPSYrr, X86::VADDPSYrm, 0 }, + { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 }, + { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 }, + { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 }, + { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 }, + { X86::VANDPDYrr, X86::VANDPDYrm, 0 }, + { X86::VANDPSYrr, X86::VANDPSYrm, 0 }, + { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 }, + { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 }, + { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 }, + { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 }, + { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 }, + { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 }, + { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 }, + { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 }, + { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 }, + { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 }, + { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 }, + { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 }, + { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 }, + { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 }, + { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, 0 }, + { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 }, + { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, 0 }, + { X86::VMINPDYrr, X86::VMINPDYrm, 0 }, + { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, 0 }, + { X86::VMINPSYrr, X86::VMINPSYrm, 0 }, + { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, 0 }, + { X86::VMULPDYrr, X86::VMULPDYrm, 0 }, + { X86::VMULPSYrr, X86::VMULPSYrm, 0 }, + { X86::VORPDYrr, X86::VORPDYrm, 0 }, + { X86::VORPSYrr, X86::VORPSYrm, 0 }, + { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 }, + { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 }, + { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 }, + { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 }, + { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 }, + { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 }, + { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 }, + { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 }, + { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 }, + { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 }, + { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 }, + { X86::VXORPDYrr, X86::VXORPDYrm, 0 }, + { X86::VXORPSYrr, X86::VXORPSYrm, 0 }, // AVX2 foldable instructions - { X86::VINSERTI128rr, X86::VINSERTI128rm, TB_ALIGN_16 }, - { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_32 }, - { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_32 }, - { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_32 }, - { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_32 }, - { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_32 }, - { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_32 }, - { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_32 }, - { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_32 }, - { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_32 }, - { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_32 }, - { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_32 }, - { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_32 }, - { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_32 }, - { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_32 }, - { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_32 }, - { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_32 }, - { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_32 }, - { X86::VPBLENDDrri, X86::VPBLENDDrmi, TB_ALIGN_32 }, - { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, TB_ALIGN_32 }, - { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, TB_ALIGN_32 }, - { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_32 }, - { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_32 }, - { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_32 }, - { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_32 }, - { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_32 }, - { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_32 }, - { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_32 }, - { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 }, - { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 }, - { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 }, - { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 }, - { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, - { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 }, - { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 }, - { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, - { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 }, - { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 }, - { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 }, - { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 }, - { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 }, - { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 }, - { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 }, - { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 }, - { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 }, - { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 }, - { X86::VPMINSBYrr, X86::VPMINSBYrm, TB_ALIGN_32 }, - { X86::VPMINSDYrr, X86::VPMINSDYrm, TB_ALIGN_32 }, - { X86::VPMINUDYrr, X86::VPMINUDYrm, TB_ALIGN_32 }, - { X86::VPMINUWYrr, X86::VPMINUWYrm, TB_ALIGN_32 }, - { X86::VPMAXSBYrr, X86::VPMAXSBYrm, TB_ALIGN_32 }, - { X86::VPMAXSDYrr, X86::VPMAXSDYrm, TB_ALIGN_32 }, - { X86::VPMAXUDYrr, X86::VPMAXUDYrm, TB_ALIGN_32 }, - { X86::VPMAXUWYrr, X86::VPMAXUWYrm, TB_ALIGN_32 }, - { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 }, - { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 }, - { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 }, - { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_32 }, - { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_32 }, - { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_32 }, - { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_32 }, - { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 }, - { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 }, - { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 }, - { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 }, - { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 }, - { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 }, - { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 }, - { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 }, - { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 }, - { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 }, - { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 }, - { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_32 }, - { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 }, - { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_32 }, - { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 }, - { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 }, - { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 }, - { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_32 }, - { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 }, - { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 }, - { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 }, - { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 }, - { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_32 }, - { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 }, - { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_32 }, - { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_32 }, - { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_32 }, - { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_32 }, - { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_32 }, - { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_32 }, - { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_32 }, - { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_32 }, - { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 }, - { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_32 }, - { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_32 }, - { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_32 }, - { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_32 }, - { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 }, - { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 }, + { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 }, + { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 }, + { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 }, + { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 }, + { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 }, + { X86::VPADDBYrr, X86::VPADDBYrm, 0 }, + { X86::VPADDDYrr, X86::VPADDDYrm, 0 }, + { X86::VPADDQYrr, X86::VPADDQYrm, 0 }, + { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 }, + { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 }, + { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 }, + { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 }, + { X86::VPADDWYrr, X86::VPADDWYrm, 0 }, + { X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 }, + { X86::VPANDNYrr, X86::VPANDNYrm, 0 }, + { X86::VPANDYrr, X86::VPANDYrm, 0 }, + { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 }, + { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 }, + { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 }, + { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 }, + { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 }, + { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 }, + { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 }, + { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 }, + { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 }, + { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 }, + { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 }, + { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 }, + { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 }, + { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 }, + { X86::VPERMDYrr, X86::VPERMDYrm, 0 }, + { X86::VPERMPDYri, X86::VPERMPDYmi, 0 }, + { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 }, + { X86::VPERMQYri, X86::VPERMQYmi, 0 }, + { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 }, + { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 }, + { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 }, + { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 }, + { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 }, + { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 }, + { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 }, + { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 }, + { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 }, + { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 }, + { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 }, + { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 }, + { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 }, + { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 }, + { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 }, + { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 }, + { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 }, + { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 }, + { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 }, + { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 }, + { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 }, + { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 }, + { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, 0 }, + { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 }, + { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 }, + { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 }, + { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 }, + { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 }, + { X86::VPORYrr, X86::VPORYrm, 0 }, + { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 }, + { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 }, + { X86::VPSIGNBYrr, X86::VPSIGNBYrm, 0 }, + { X86::VPSIGNWYrr, X86::VPSIGNWYrm, 0 }, + { X86::VPSIGNDYrr, X86::VPSIGNDYrm, 0 }, + { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 }, + { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 }, + { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 }, + { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 }, + { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 }, + { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 }, + { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 }, + { X86::VPSRADYrr, X86::VPSRADYrm, 0 }, + { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 }, + { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 }, + { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 }, + { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 }, + { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 }, + { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 }, + { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 }, + { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 }, + { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 }, + { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 }, + { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 }, + { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 }, + { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 }, + { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 }, + { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 }, + { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 }, + { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 }, + { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 }, + { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 }, + { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 }, + { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 }, + { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 }, + { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 }, + { X86::VPXORYrr, X86::VPXORYrm, 0 }, // FIXME: add AVX 256-bit foldable instructions // FMA4 foldable patterns -- cgit v1.2.3-18-g5258 From 71f30bffcf158ffac8c75bb67872e275b7f87e47 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 24 Dec 2012 19:58:45 +0000 Subject: Quiet gcc's -Wparenthesis warning. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171044 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7eff0468d0..5f7f915097 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17966,7 +17966,7 @@ X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned AddressSpace) const { // Legalize the type. std::pair LT = getTypeLegalizationCost(Src); - assert(Opcode == Instruction::Load || Opcode == Instruction::Store && + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); const X86Subtarget &ST = -- cgit v1.2.3-18-g5258 From a4c8a32a9f9ca0db1e7dd95c0a95529403097c1f Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 25 Dec 2012 03:29:18 +0000 Subject: VCVTSS2SD requires a strict alignment. Thanks Elena. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171049 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 876a63f87b..5192a1c1dd 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -852,8 +852,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, - { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, - { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, + { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, TB_ALIGN_16 }, + { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, TB_ALIGN_16 }, { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 }, { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, -- cgit v1.2.3-18-g5258 From 382ed78d3fef9f6c582e3cdcfb30f8c6fa3d0d79 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 25 Dec 2012 12:54:19 +0000 Subject: X86: Custom lower <2 x i64> eq and ne when SSE41 is not available. pcmpeqd, pshufd, pshufd, pand is cheaper than unpack + cmpq, sbbq, cmpq, sbbq + pack. Small speedup on loop-vectorized viterbi (-march=core2). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171063 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5f7f915097..a173712b5b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9171,8 +9171,30 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (VT == MVT::v2i64) { if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) return SDValue(); - if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) - return SDValue(); + if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { + // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with + // pcmpeqd + 2 shuffles + pand. + assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); + + // First cast everything to the right type, + Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + + // Do the compare. + SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); + + // Make sure the lower and upper halves are both all-ones. + const int Mask1[] = { 0, 0, 2, 2 }; + SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1); + const int Mask2[] = { 1, 1, 3, 3 }; + SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2); + Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2); + + if (Invert) + Result = DAG.getNOT(dl, Result, MVT::v4i32); + + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } } // Since SSE has no unsigned integer comparisons, we need to flip the sign -- cgit v1.2.3-18-g5258 From 99f78061e05833e815cb7a27e6c17eadcd028ce2 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 25 Dec 2012 13:09:08 +0000 Subject: X86: Shave off one shuffle from the pcmpeqq sequence for SSE2 by making use of and commutativity. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171064 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a173712b5b..ad86c99fd2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9173,7 +9173,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { return SDValue(); if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with - // pcmpeqd + 2 shuffles + pand. + // pcmpeqd + pshufd + pand. assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); // First cast everything to the right type, @@ -9184,11 +9184,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); // Make sure the lower and upper halves are both all-ones. - const int Mask1[] = { 0, 0, 2, 2 }; - SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1); - const int Mask2[] = { 1, 1, 3, 3 }; - SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2); - Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2); + const int Mask[] = { 1, 0, 3, 2 }; + SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); + Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); -- cgit v1.2.3-18-g5258 From 0f77910e6fb2c7fccc9643df8b6859c742d678a2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 00:35:47 +0000 Subject: Remove alignment requirement from VCVTSS2SD in folding tables. Reverting r171049. This instruction doesn't require alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171078 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5192a1c1dd..876a63f87b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -852,8 +852,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, - { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, TB_ALIGN_16 }, - { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, TB_ALIGN_16 }, + { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, + { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 }, { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, -- cgit v1.2.3-18-g5258 From 1ac0046fa83142724c94c8edd46584d638fc141a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 01:47:12 +0000 Subject: Remove alignment requirements from (V)EXTRACTPS. This instruction does 32-bit stores which aren't required to be aligned on SSE or AVX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171080 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 876a63f87b..7174353545 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -297,7 +297,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD }, { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE }, { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD }, @@ -355,7 +355,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, // AVX 128-bit versions of foldable instructions - { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE }, { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, -- cgit v1.2.3-18-g5258 From d83a73adf0cd8181eb43f2e5116e53a508e126db Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 02:14:19 +0000 Subject: Remove alignment from folding table for VMOVUPD as an unaligned instruction it shouldn't require alignment... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171081 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7174353545..3d2f7e8a0f 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -510,7 +510,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 }, { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 }, - { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 }, + { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 }, { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 }, { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, -- cgit v1.2.3-18-g5258 From 174a3d3e63734aced494b0f725fd4d0bf1fa3491 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 02:44:47 +0000 Subject: Remove alignment from a bunch more VEX encoded operations in the folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171082 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 94 ++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 47 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 3d2f7e8a0f..bb81fa163f 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -515,22 +515,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 }, { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, - { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 }, - { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 }, - { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 }, - { X86::VPERMILPDri, X86::VPERMILPDmi, TB_ALIGN_16 }, - { X86::VPERMILPSri, X86::VPERMILPSmi, TB_ALIGN_16 }, - { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 }, - { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 }, - { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 }, - { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 }, - { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 }, - { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 }, - { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 }, - { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 }, - { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 }, - { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 }, - { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 }, + { X86::VPABSBrr128, X86::VPABSBrm128, 0 }, + { X86::VPABSDrr128, X86::VPABSDrm128, 0 }, + { X86::VPABSWrr128, X86::VPABSWrm128, 0 }, + { X86::VPERMILPDri, X86::VPERMILPDmi, 0 }, + { X86::VPERMILPSri, X86::VPERMILPSmi, 0 }, + { X86::VPSHUFDri, X86::VPSHUFDmi, 0 }, + { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 }, + { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 }, + { X86::VRCPPSr, X86::VRCPPSm, 0 }, + { X86::VRCPPSr_Int, X86::VRCPPSm_Int, 0 }, + { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 }, + { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, 0 }, + { X86::VSQRTPDr, X86::VSQRTPDm, 0 }, + { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, 0 }, + { X86::VSQRTPSr, X86::VSQRTPSm, 0 }, + { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, 0 }, { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, @@ -541,24 +541,24 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, - { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 }, - { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 }, + { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 }, + { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 }, // AVX2 foldable instructions - { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 }, - { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 }, - { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_32 }, - { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_32 }, - { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_32 }, - { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_32 }, - { X86::VRCPPSYr, X86::VRCPPSYm, TB_ALIGN_32 }, - { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, TB_ALIGN_32 }, - { X86::VRSQRTPSYr, X86::VRSQRTPSYm, TB_ALIGN_32 }, - { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, TB_ALIGN_32 }, - { X86::VSQRTPDYr, X86::VSQRTPDYm, TB_ALIGN_32 }, - { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 }, - { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 }, - { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 }, + { X86::VPABSBrr256, X86::VPABSBrm256, 0 }, + { X86::VPABSDrr256, X86::VPABSDrm256, 0 }, + { X86::VPABSWrr256, X86::VPABSWrm256, 0 }, + { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 }, + { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 }, + { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 }, + { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, + { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 }, + { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, + { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, 0 }, + { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, + { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, 0 }, + { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, + { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, 0 }, { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, @@ -889,31 +889,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 }, { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 }, { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 }, - { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 }, - { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 }, - { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 }, - { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 }, + { X86::VHADDPDrr, X86::VHADDPDrm, 0 }, + { X86::VHADDPSrr, X86::VHADDPSrm, 0 }, + { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 }, + { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 }, { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 }, { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 }, - { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 }, - { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 }, - { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 }, - { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 }, + { X86::VMAXPDrr, X86::VMAXPDrm, 0 }, + { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, 0 }, + { X86::VMAXPSrr, X86::VMAXPSrm, 0 }, + { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, 0 }, { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 }, { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 }, - { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 }, - { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 }, - { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 }, - { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 }, + { X86::VMINPDrr, X86::VMINPDrm, 0 }, + { X86::VMINPDrr_Int, X86::VMINPDrm_Int, 0 }, + { X86::VMINPSrr, X86::VMINPSrm, 0 }, + { X86::VMINPSrr_Int, X86::VMINPSrm_Int, 0 }, { X86::VMINSDrr, X86::VMINSDrm, 0 }, { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 }, { X86::VMINSSrr, X86::VMINSSrm, 0 }, { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 }, - { X86::VMPSADBWrri, X86::VMPSADBWrmi, TB_ALIGN_16 }, - { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 }, - { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 }, + { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 }, + { X86::VMULPDrr, X86::VMULPDrm, 0 }, + { X86::VMULPSrr, X86::VMULPSrm, 0 }, { X86::VMULSDrr, X86::VMULSDrm, 0 }, { X86::VMULSSrr, X86::VMULSSrm, 0 }, { X86::VORPDrr, X86::VORPDrm, 0 }, -- cgit v1.2.3-18-g5258 From b5c590a5861b1a38e836092a5137307973480b05 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 03:48:10 +0000 Subject: Mark VANDNPD/VANDNPDS as not commutable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171085 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 69e2b4390f..330f05d34f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2812,7 +2812,8 @@ multiclass sse12_fp_packed_logical_y opc, string OpcodeStr, defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; -defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; +let isCommutable = 0 in + defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; -- cgit v1.2.3-18-g5258 From 1fe132ae7d3cc06726c4bf42b8b360b3b8b60280 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 03:56:47 +0000 Subject: Merge an AVX/SSE 256-bit and 128-bit multiclass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 330f05d34f..af7a95ca1c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2757,6 +2757,20 @@ let neverHasSideEffects = 1, Pattern = [], isCommutable = 0 in /// multiclass sse12_fp_packed_logical opc, string OpcodeStr, SDNode OpNode> { + defm V#NAME#PSY : sse12_fp_packed_logical_rm, TB, VEX_4V, VEX_L; + + defm V#NAME#PDY : sse12_fp_packed_logical_rm, + TB, OpSize, VEX_4V, VEX_L; + // In AVX no need to add a pattern for 128-bit logical rr ps, because they // are all promoted to v2i64, and the patterns are covered by the int // version. This is needed in SSE only, because v2i64 isn't supported on @@ -2773,6 +2787,7 @@ multiclass sse12_fp_packed_logical opc, string OpcodeStr, [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), (memopv2i64 addr:$src2)))], 0>, TB, OpSize, VEX_4V; + let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm opc, string OpcodeStr, } } -/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms -/// -multiclass sse12_fp_packed_logical_y opc, string OpcodeStr, - SDNode OpNode> { - defm PSY : sse12_fp_packed_logical_rm, TB, VEX_4V, VEX_L; - - defm PDY : sse12_fp_packed_logical_rm, - TB, OpSize, VEX_4V, VEX_L; -} - -// AVX 256-bit packed logical ops forms -defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; -defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; -defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; -let isCommutable = 0 in - defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; - defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; -- cgit v1.2.3-18-g5258 From 09a326d3f0fba1b5c519e7e6a45b978ced0c1cc4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 04:36:03 +0000 Subject: Use an additional multiclass to merge the 128/256-bit SSE/AVX instruction definitions for PAND/POR/PXOR/PANDN git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171087 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 48 ++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 30 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index af7a95ca1c..9c1166a1b7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2679,40 +2679,28 @@ multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, } } // ExeDomain = SSEPackedInt -// These are ordered here for pattern ordering requirements with the fp versions +multiclass PDI_binop_all opc, string OpcodeStr, SDNode Opcode, + OpndItins itins, bit IsCommutable = 0> { +let Predicates = [HasAVX] in + defm VP#NAME# : PDI_binop_rm, VEX_4V; -let Predicates = [HasAVX] in { -defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; -defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; -defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; -defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm P#NAME# : PDI_binop_rm; + +let Predicates = [HasAVX2] in + defm VP#NAME#Y : PDI_binop_rm, VEX_4V, VEX_L; } -let Constraints = "$src1 = $dst" in { -defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1>; -defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1>; -defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1>; -defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 0>; -} // Constraints = "$src1 = $dst" +// These are ordered here for pattern ordering requirements with the fp versions -let Predicates = [HasAVX2] in { -defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L; -} +defm AND : PDI_binop_all<0xDB, "pand", and, SSE_BIT_ITINS_P, 1>; +defm OR : PDI_binop_all<0xEB, "por", or, SSE_BIT_ITINS_P, 1>; +defm XOR : PDI_binop_all<0xEF, "pxor", xor, SSE_BIT_ITINS_P, 1>; +defm ANDN : PDI_binop_all<0xDF, "pandn", X86andnp, SSE_BIT_ITINS_P, 0>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions -- cgit v1.2.3-18-g5258 From a05f7cbbded2a2605acfd3cd05211e6006409da0 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 26 Dec 2012 04:59:20 +0000 Subject: Reformat the docs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171091 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ad86c99fd2..31e69514fb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14408,51 +14408,42 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) { + // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { - // AVX2: v4i64 -> v4i32 - - // VPERMD static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op); Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32), ShufMask); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op, DAG.getIntPtrConstant(0)); } - // AVX: v4i64 -> v4i32 + // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS. SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, DAG.getIntPtrConstant(0)); - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, DAG.getIntPtrConstant(2)); OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); - // PSHUFD + // The PSHUFD mask: static const int ShufMask1[] = {0, 2, 0, 0}; - SDValue Undef = DAG.getUNDEF(VT); OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1); OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1); - // MOVLHPS + // The MOVLHPS mask: static const int ShufMask2[] = {0, 1, 4, 5}; - return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2); } if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) { + // On AVX2, v8i32 -> v8i16 becomed PSHUFB. if (Subtarget->hasInt256()) { - // AVX2: v8i32 -> v8i16 - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op); - // PSHUFB SmallVector pshufbMask; for (unsigned i = 0; i < 2; ++i) { pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8)); @@ -14469,16 +14460,13 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8, &pshufbMask[0], 32); Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV); - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op); static const int ShufMask[] = {0, 2, -1, -1}; Op = DAG.getVectorShuffle(MVT::v4i64, dl, Op, DAG.getUNDEF(MVT::v4i64), &ShufMask[0]); - Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, DAG.getIntPtrConstant(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Op); } @@ -14491,7 +14479,7 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo); OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi); - // PSHUFB + // The PSHUFB mask: static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}; @@ -14502,9 +14490,8 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); - // MOVLHPS + // The MOVLHPS Mask: static const int ShufMask2[] = {0, 1, 4, 5}; - SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2); return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res); } -- cgit v1.2.3-18-g5258 From 3cdc3827ce418feb03fddf863bf2742c69a51bcd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 05:49:15 +0000 Subject: Use an additional multiclass to merge the 128/256-bit SSE/AVX instruction definitions for a bunch of SSE2 integer arithmetic instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171092 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 150 ++++++++++++------------------------------ 1 file changed, 42 insertions(+), 108 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9c1166a1b7..87763b7bdc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2680,27 +2680,29 @@ multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, } // ExeDomain = SSEPackedInt multiclass PDI_binop_all opc, string OpcodeStr, SDNode Opcode, + ValueType OpVT128, ValueType OpVT256, OpndItins itins, bit IsCommutable = 0> { let Predicates = [HasAVX] in - defm VP#NAME# : PDI_binop_rm, VEX_4V; let Constraints = "$src1 = $dst" in - defm P#NAME# : PDI_binop_rm; let Predicates = [HasAVX2] in - defm VP#NAME#Y : PDI_binop_rm, VEX_4V, VEX_L; + defm VP#NAME#Y : PDI_binop_rm, VEX_4V, VEX_L; } // These are ordered here for pattern ordering requirements with the fp versions -defm AND : PDI_binop_all<0xDB, "pand", and, SSE_BIT_ITINS_P, 1>; -defm OR : PDI_binop_all<0xEB, "por", or, SSE_BIT_ITINS_P, 1>; -defm XOR : PDI_binop_all<0xEF, "pxor", xor, SSE_BIT_ITINS_P, 1>; -defm ANDN : PDI_binop_all<0xDF, "pandn", X86andnp, SSE_BIT_ITINS_P, 0>; +defm AND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm OR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm XOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm ANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, + SSE_BIT_ITINS_P, 0>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions @@ -3680,45 +3682,43 @@ multiclass PDI_binop_rm2 opc, string OpcodeStr, SDNode OpNode, } } // ExeDomain = SSEPackedInt +defm ADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm ADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm ADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, + SSE_INTALU_ITINS_P, 1>; +defm ADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, + SSE_INTALUQ_ITINS_P, 1>; +defm MULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, + SSE_INTMUL_ITINS_P, 1>; +defm SUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm SUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm SUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, + SSE_INTALU_ITINS_P, 0>; +defm SUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, + SSE_INTALUQ_ITINS_P, 0>; +defm SUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm SUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm MINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm MINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm MAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm MAXSW : PDI_binop_all<0xEE, "vpmaxsw", X86smax, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; + // 128-bit Integer Arithmetic let Predicates = [HasAVX] in { -defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>, - VEX_4V; -defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64, - i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V; -defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64, - i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; -defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64, - i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBUSB : PDI_binop_rm<0xD8, "vpsubusb", X86subus, v16i8, VR128, - memopv2i64, i128mem, SSE_INTALU_ITINS_P, 0, 0>, - VEX_4V; -defm VPSUBUSW : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v8i16, VR128, - memopv2i64, i128mem, SSE_INTALU_ITINS_P, 0, 0>, - VEX_4V; defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; -defm VPMINUB : PDI_binop_rm<0xDA, "vpminub", X86umin, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMINSW : PDI_binop_rm<0xEA, "vpminsw", X86smin, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMAXUB : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMAXSW : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; // Intrinsic forms defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, @@ -3760,45 +3760,9 @@ defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, } let Predicates = [HasAVX2] in { -defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64, - i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, - i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64, - i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBUSBY : PDI_binop_rm<0xD8, "vpsubusb", X86subus, v32i8, VR256, - memopv4i64, i256mem, SSE_INTALU_ITINS_P, 0, 0>, - VEX_4V, VEX_L; -defm VPSUBUSWY : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v16i16, VR256, - memopv4i64, i256mem, SSE_INTALU_ITINS_P, 0, 0>, - VEX_4V, VEX_L; defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, VR256, memopv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMINUBY : PDI_binop_rm<0xDA, "vpminub", X86umin, v32i8, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMINSWY : PDI_binop_rm<0xEA, "vpminsw", X86smin, v16i16, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMAXUBY : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v32i8, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMAXSWY : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v16i16, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; // Intrinsic forms defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, @@ -3840,38 +3804,8 @@ defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, } let Constraints = "$src1 = $dst" in { -defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64, - i128mem, SSE_INTALUQ_ITINS_P, 1>; -defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64, - i128mem, SSE_INTMUL_ITINS_P, 1>; -defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P>; -defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P>; -defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P>; -defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64, - i128mem, SSE_INTALUQ_ITINS_P>; -defm PSUBUSB : PDI_binop_rm<0xD8, "psubusb", X86subus, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P>; -defm PSUBUSW : PDI_binop_rm<0xD9, "psubusw", X86subus, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P>; defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; -defm PMINUB : PDI_binop_rm<0xDA, "pminub", X86umin, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PMINSW : PDI_binop_rm<0xEA, "pminsw", X86smin, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PMAXUB : PDI_binop_rm<0xDE, "pmaxub", X86umax, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PMAXSW : PDI_binop_rm<0xEE, "pmaxsw", X86smax, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; // Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, -- cgit v1.2.3-18-g5258 From 02082efaab9d66a64b7f9d0c4c882fa8ddad1f63 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 06:02:15 +0000 Subject: Remove 'v' from mnemonic to fix asm matching failures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171093 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 87763b7bdc..2348022715 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3710,7 +3710,7 @@ defm MINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, SSE_INTALU_ITINS_P, 1>; defm MAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, SSE_INTALU_ITINS_P, 1>; -defm MAXSW : PDI_binop_all<0xEE, "vpmaxsw", X86smax, v8i16, v16i16, +defm MAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, SSE_INTALU_ITINS_P, 1>; // 128-bit Integer Arithmetic -- cgit v1.2.3-18-g5258 From 219bc2db1fc4222f63e059f55df3cfb3532e830f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 06:14:15 +0000 Subject: Merge SSE/AVX PCMPEQ/PCMPGT instruction definitions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171095 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 74 +++++++------------------------------------ 1 file changed, 12 insertions(+), 62 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2348022715..ad51e53bd9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4028,68 +4028,18 @@ let Predicates = [UseSSE2] in { // SSE2 - Packed Integer Comparison Instructions //===---------------------------------------------------------------------===// -let Predicates = [HasAVX] in { - defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; - defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; - defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; - defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; - defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; - defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { - defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; - defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; - defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; - defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; - defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; - defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { - defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; - defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; - defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; - defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; - defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; - defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -} // Constraints = "$src1 = $dst" +defm CMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm CMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm CMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, + SSE_INTALU_ITINS_P, 1>; +defm CMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm CMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm CMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, + SSE_INTALU_ITINS_P, 0>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Pack Instructions -- cgit v1.2.3-18-g5258 From 6f9d44e072fd119826a7524d6501a5f4fde9fbff Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 06:15:09 +0000 Subject: Fix class name in comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171096 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ad51e53bd9..7edfac904b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3659,7 +3659,7 @@ multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; } -/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types +/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types multiclass PDI_binop_rm2 opc, string OpcodeStr, SDNode OpNode, ValueType DstVT, ValueType SrcVT, RegisterClass RC, PatFrag memop_frag, X86MemOperand x86memop, -- cgit v1.2.3-18-g5258 From 755841d9d74c9e567b2aca5ae1bccb8a06d94bfc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 06:15:53 +0000 Subject: Fix 80 column violation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171097 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7edfac904b..a931d66020 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2687,8 +2687,8 @@ let Predicates = [HasAVX] in VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm P#NAME# : PDI_binop_rm; + defm P#NAME# : PDI_binop_rm; let Predicates = [HasAVX2] in defm VP#NAME#Y : PDI_binop_rm Date: Wed, 26 Dec 2012 07:20:35 +0000 Subject: Merge more SSE/AVX instruction definitions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171102 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 178 ++++++++++++------------------------------ 1 file changed, 49 insertions(+), 129 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a931d66020..166ea3611d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2657,10 +2657,8 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, - OpndItins itins, - bit IsCommutable = 0, - bit Is2Addr = 1> { + X86MemOperand x86memop, OpndItins itins, + bit IsCommutable, bit Is2Addr> { let isCommutable = IsCommutable in def rr : PDI; + memopv2i64, i128mem, itins, IsCommutable, 1>; let Predicates = [HasAVX2] in defm VP#NAME#Y : PDI_binop_rm opc, string OpcodeStr, Intrinsic IntId, itins.rm>; } +multiclass PDI_binop_all_int opc, string OpcodeStr, Intrinsic IntId128, + Intrinsic IntId256, OpndItins itins, + bit IsCommutable = 0> { +let Predicates = [HasAVX] in + defm VP#NAME# : PDI_binop_rm_int, VEX_4V; + +let Constraints = "$src1 = $dst" in + defm P#NAME# : PDI_binop_rm_int; + +let Predicates = [HasAVX2] in + defm VP#NAME#Y : PDI_binop_rm_int, VEX_4V, VEX_L; +} + multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, string OpcodeStr, SDNode OpNode, SDNode OpNode2, RegisterClass RC, @@ -3713,140 +3729,44 @@ defm MAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, defm MAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, SSE_INTALU_ITINS_P, 1>; -// 128-bit Integer Arithmetic +// Intrinsic forms +defm SUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, + int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>; +defm SUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, + int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>; +defm ADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b, + int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>; +defm ADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w, + int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>; +defm ADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, + int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; +defm ADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, + int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; +defm MULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, + int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>; +defm MULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, + int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>; +defm MADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, + int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; +defm AVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, + int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; +defm AVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, + int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; +defm SADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, + int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>; -let Predicates = [HasAVX] in { +let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; - -// Intrinsic forms -defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, - VR128, memopv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; -defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, - VR128, memopv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; -defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, - VR128, memopv2i64, i128mem, - SSE_PMADD, 1, 0>, VEX_4V; -defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2] in defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, VR256, memopv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; - -// Intrinsic forms -defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w, - VR256, memopv4i64, i256mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w, - VR256, memopv4i64, i256mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd, - VR256, memopv4i64, i256mem, - SSE_PMADD, 1, 0>, VEX_4V, VEX_L; -defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst" in defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; -// Intrinsic forms -defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, - VR128, memopv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1>; -defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, - VR128, memopv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1>; -defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, - VR128, memopv2i64, i128mem, - SSE_PMADD, 1>; -defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; - -} // Constraints = "$src1 = $dst" - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions //===---------------------------------------------------------------------===// -- cgit v1.2.3-18-g5258 From 0a5ead92ff44416244d32ceafb1b5435b3cca73b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 07:54:43 +0000 Subject: Merge still more SSE/AVX instruction definitions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171103 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 58 +++++++++++-------------------------------- 1 file changed, 15 insertions(+), 43 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 166ea3611d..47ddb061f5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2681,12 +2681,12 @@ multiclass PDI_binop_all opc, string OpcodeStr, SDNode Opcode, ValueType OpVT128, ValueType OpVT256, OpndItins itins, bit IsCommutable = 0> { let Predicates = [HasAVX] in - defm VP#NAME# : PDI_binop_rm, VEX_4V; let Constraints = "$src1 = $dst" in - defm P#NAME# : PDI_binop_rm; + defm P#NAME : PDI_binop_rm; let Predicates = [HasAVX2] in defm VP#NAME#Y : PDI_binop_rm opc, string OpcodeStr, Intrinsic IntId128, Intrinsic IntId256, OpndItins itins, bit IsCommutable = 0> { let Predicates = [HasAVX] in - defm VP#NAME# : PDI_binop_rm_int, VEX_4V; + defm VP#NAME : PDI_binop_rm_int, VEX_4V; let Constraints = "$src1 = $dst" in - defm P#NAME# : PDI_binop_rm_int; + defm P#NAME : PDI_binop_rm_int; let Predicates = [HasAVX2] in defm VP#NAME#Y : PDI_binop_rm_int, VEX_4V; -defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { -defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { -defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -} // Constraints = "$src1 = $dst" +// FIXME: Names are bad due to the need to have a 'P' prefix in the multiclass. +defm ACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128, + int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>; +defm ACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, + int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>; +defm ACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, + int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Shuffle Instructions -- cgit v1.2.3-18-g5258 From a85cbfeba79263a6afc5543de21a5cea83dcfa9e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 21:04:30 +0000 Subject: Remove a special conditional setting of neverHasSideEffects if the instruction didn't have a pattern. This was leftover from when tablegen used to complain if things were already inferred from patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171117 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 47ddb061f5..885fa1a595 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -203,9 +203,8 @@ multiclass sse12_fp_packed opc, string OpcodeStr, SDNode OpNode, multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, string OpcodeStr, X86MemOperand x86memop, list pat_rr, list pat_rm, - bit Is2Addr = 1, - bit rr_hasSideEffects = 0> { - let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in + bit Is2Addr = 1> { + let isCommutable = 1, hasSideEffects = 0 in def rr : PI opc, string OpcodeStr, defm V#NAME#PS : sse12_fp_packed_logical_rm, TB, VEX_4V; + (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm Date: Wed, 26 Dec 2012 21:30:22 +0000 Subject: Mark all the _REV instructions as not having side effects. They aren't really emitted by the backend, but it reduces the number of instructions in the output files with unmodelled side effects to make auditing easier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171118 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 1 + lib/Target/X86/X86InstrFMA.td | 4 ++-- lib/Target/X86/X86InstrInfo.td | 2 +- lib/Target/X86/X86InstrSSE.td | 12 ++++++------ 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 478c42d657..874b9aae09 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -690,6 +690,7 @@ class BinOpRR_Rev opcode, string mnemonic, X86TypeInfo typeinfo> mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; + let hasSideEffects = 0; } // BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding). diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index d360a73b34..f48f133b12 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -220,7 +220,7 @@ multiclass fma4s opc, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>; // For disassembler -let isCodeGenOnly = 1 in +let isCodeGenOnly = 1, hasSideEffects = 0 in def rr_REV : FMA4 opc, string OpcodeStr, SDNode OpNode, [(set VR256:$dst, (OpNode VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L; // For disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr_REV : FMA4; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 885fa1a595..70d1de55a0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -480,7 +480,7 @@ def VMOVSDrr : sse12_move_rr, XD; // For the disassembler - let isCodeGenOnly = 1 in { + let isCodeGenOnly = 1, hasSideEffects = 0 in { def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), "movss\t{$src2, $dst|$dst, $src2}", [], @@ -869,7 +869,7 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), IIC_SSE_MOVU_P_MR>, VEX, VEX_L; // For disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], @@ -943,7 +943,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), IIC_SSE_MOVU_P_MR>; // For disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3485,7 +3485,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3548,7 +3548,7 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; // For Disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; -- cgit v1.2.3-18-g5258 From b87a5b3a1fa080efea810945d7fdeee51aef0888 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 22:19:23 +0000 Subject: Mark the AL/AX/EAX forms of the basic arithmetic operations has never having side effects. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 87 ++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 43 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 874b9aae09..6bda62879e 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -57,7 +57,7 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), - "mul{w}\t$src", + "mul{w}\t$src", [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in @@ -158,7 +158,7 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (X86smul_flag GR16:$src1, (load addr:$src2)))], IIC_IMUL16_RM>, TB, OpSize; -def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), +def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, @@ -182,8 +182,8 @@ let Defs = [EFLAGS] in { def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, imm:$src2))], + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, imm:$src2))], IIC_IMUL16_RRI>, OpSize; def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), @@ -319,7 +319,7 @@ let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX -def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), +def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), "idiv{l}\t$src", [], IIC_IDIV32>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), @@ -412,11 +412,11 @@ def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), IIC_UNARY_REG>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In32BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], IIC_UNARY_REG>, @@ -430,22 +430,22 @@ def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", // In 64-bit mode, single byte INC and DEC cannot be encoded. let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can transform into LEA. -def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), +def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In64BitMode]>; -def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), +def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], IIC_UNARY_REG>, Requires<[In64BitMode]>; -def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), +def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In64BitMode]>; -def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), +def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], IIC_UNARY_REG>, @@ -469,7 +469,7 @@ let CodeSize = 2 in { def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; - + // These are duplicates of their 32-bit counterparts. Only needed so X86 knows // how to unfold them. // FIXME: What is this for?? @@ -498,12 +498,12 @@ def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))], IIC_UNARY_REG>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In32BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], IIC_UNARY_REG>, @@ -544,57 +544,57 @@ class X86TypeInfo { /// VT - This is the value type itself. ValueType VT = vt; - + /// InstrSuffix - This is the suffix used on instructions with this type. For /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q". string InstrSuffix = instrsuffix; - + /// RegClass - This is the register class associated with this type. For /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64. RegisterClass RegClass = regclass; - + /// LoadNode - This is the load node associated with this type. For /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64. PatFrag LoadNode = loadnode; - + /// MemOperand - This is the memory operand associated with this type. For /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem. X86MemOperand MemOperand = memoperand; - + /// ImmEncoding - This is the encoding of an immediate of this type. For /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32 /// since the immediate fields of i64 instructions is a 32-bit sign extended /// value. ImmType ImmEncoding = immkind; - + /// ImmOperand - This is the operand kind of an immediate of this type. For /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 -> /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign /// extended value. Operand ImmOperand = immoperand; - + /// ImmOperator - This is the operator that should be used to match an /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32). SDPatternOperator ImmOperator = immoperator; - + /// Imm8Operand - This is the operand kind to use for an imm8 of this type. /// For example, i8 -> , i16 -> i16i8imm, i32 -> i32i8imm. This is /// only used for instructions that have a sign-extended imm8 field form. Operand Imm8Operand = imm8operand; - + /// Imm8Operator - This is the operator that should be used to match an 8-bit /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8). SDPatternOperator Imm8Operator = imm8operator; - + /// HasOddOpcode - This bit is true if the instruction should have an odd (as /// opposed to even) opcode. Operations on i8 are usually even, operations on /// other datatypes are odd. bit HasOddOpcode = hasOddOpcode; - + /// HasOpSizePrefix - This bit is set to true if the instruction should have /// the 0x66 operand size prefix. This is set for i16 types. bit HasOpSizePrefix = hasOpSizePrefix; - + /// HasREX_WPrefix - This bit is set to true if the instruction should have /// the 0x40 REX prefix. This is set for i64 types. bit HasREX_WPrefix = hasREX_WPrefix; @@ -624,12 +624,12 @@ def Xi64 : X86TypeInfo opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, +class ITy opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, string mnemonic, string args, list pattern, InstrItinClass itin = IIC_BIN_NONMEM> : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode }, - f, outs, ins, + f, outs, ins, !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern, itin> { @@ -765,13 +765,13 @@ class BinOpRI_F opcode, string mnemonic, X86TypeInfo typeinfo, class BinOpRI_RF opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> : BinOpRI; // BinOpRI_RFF - Instructions like "adc reg, reg, imm". class BinOpRI_RFF opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> : BinOpRI; @@ -790,7 +790,7 @@ class BinOpRI8_R opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI8; - + // BinOpRI8_F - Instructions like "cmp reg, imm8". class BinOpRI8_F opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -853,14 +853,14 @@ class BinOpMI - : BinOpMI; // BinOpMI_RMW_FF - Instructions like "adc [mem], imm". class BinOpMI_RMW_FF - : BinOpMI; @@ -868,7 +868,7 @@ class BinOpMI_RMW_FF opcode = 0x80> - : BinOpMI; @@ -914,6 +914,7 @@ class BinOpAI opcode, string mnemonic, X86TypeInfo typeinfo, let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; + let hasSideEffects = 0; } /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is @@ -969,7 +970,7 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#16mi8 : BinOpMI8_RMW; def #NAME#32mi8 : BinOpMI8_RMW; def #NAME#64mi8 : BinOpMI8_RMW; - + def #NAME#8mi : BinOpMI_RMW; def #NAME#16mi : BinOpMI_RMW; def #NAME#32mi : BinOpMI_RMW; @@ -983,7 +984,7 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, "{$src, %eax|EAX, $src}">; def #NAME#64i32 : BinOpAI; - } + } } /// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is @@ -1040,7 +1041,7 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#16mi8 : BinOpMI8_RMW_FF; def #NAME#32mi8 : BinOpMI8_RMW_FF; def #NAME#64mi8 : BinOpMI8_RMW_FF; - + def #NAME#8mi : BinOpMI_RMW_FF; def #NAME#16mi : BinOpMI_RMW_FF; def #NAME#32mi : BinOpMI_RMW_FF; @@ -1052,9 +1053,9 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, "{$src, %ax|AX, $src}">; def #NAME#32i32 : BinOpAI; - def #NAME#64i32 : BinOpAI; - } + } } /// ArithBinOp_F - This is an arithmetic binary operator where the pattern is @@ -1090,7 +1091,7 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>; def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>; def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>; - + def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; @@ -1107,7 +1108,7 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#16mi8 : BinOpMI8_F; def #NAME#32mi8 : BinOpMI8_F; def #NAME#64mi8 : BinOpMI8_F; - + def #NAME#8mi : BinOpMI_F; def #NAME#16mi : BinOpMI_F; def #NAME#32mi : BinOpMI_F; @@ -1121,7 +1122,7 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, "{$src, %eax|EAX, $src}">; def #NAME#64i32 : BinOpAI; - } + } } @@ -1181,7 +1182,7 @@ let isCompare = 1, Defs = [EFLAGS] in { def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>; def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>; def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>; - + def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL, "{$src, %al|AL, $src}">; def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX, -- cgit v1.2.3-18-g5258 From ee5b63cb526987fa5458b2ad923fb3bbecd0c3a7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 23:08:12 +0000 Subject: Add hasSideEffects=0 to some atomic instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171122 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrCompiler.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 9e6f27988f..e9e1e10d12 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -559,7 +559,7 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; multiclass PSEUDO_ATOMIC_LOAD_BINOP6432 { - let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in + let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), (ins i64mem:$ptr, GR32:$val1, GR32:$val2), !strconcat(mnemonic, "6432 PSEUDO!"), []>; -- cgit v1.2.3-18-g5258 From e9fd6ad5679e08c59613fc2765e8b628e5c1eb95 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 26 Dec 2012 23:27:57 +0000 Subject: Fix operands and encoding form for ARPL instruction. Register form had and reversed. Memory form writes memory, but was marked as MRMSrcMem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171123 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 2301b2327c..93589fbef7 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1486,10 +1486,10 @@ def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), Requires<[In32BitMode]>; // Adjust RPL Field of Segment Selector -def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst), +def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>, Requires<[In32BitMode]>; -def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), +def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[In32BitMode]>; -- cgit v1.2.3-18-g5258 From 5e6a86c7f034d7040b20a785ef4522415cb6ab0f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Dec 2012 02:01:33 +0000 Subject: Add mayLoad, mayStore, and hasSideEffects tags to BT/BTS/BTR/BTC instructions. Shouldn't change any functionality since they don't have patterns to select them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171128 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 62 +++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 93589fbef7..d0d236ab42 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1133,24 +1133,26 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), // perspective, this is pretty bizarre. Make these instructions disassembly // only for now. -def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), - "bt{w}\t{$src2, $src1|$src1, $src2}", -// [(X86bt (loadi16 addr:$src1), GR16:$src2), -// (implicit EFLAGS)] - [], IIC_BT_MR - >, OpSize, TB, Requires<[FastBTMem]>; -def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), - "bt{l}\t{$src2, $src1|$src1, $src2}", -// [(X86bt (loadi32 addr:$src1), GR32:$src2), -// (implicit EFLAGS)] - [], IIC_BT_MR - >, TB, Requires<[FastBTMem]>; -def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", -// [(X86bt (loadi64 addr:$src1), GR64:$src2), -// (implicit EFLAGS)] - [], IIC_BT_MR - >, TB; +let mayLoad = 1, hasSideEffects = 0 in { + def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), + "bt{w}\t{$src2, $src1|$src1, $src2}", + // [(X86bt (loadi16 addr:$src1), GR16:$src2), + // (implicit EFLAGS)] + [], IIC_BT_MR + >, OpSize, TB, Requires<[FastBTMem]>; + def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), + "bt{l}\t{$src2, $src1|$src1, $src2}", + // [(X86bt (loadi32 addr:$src1), GR32:$src2), + // (implicit EFLAGS)] + [], IIC_BT_MR + >, TB, Requires<[FastBTMem]>; + def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + // [(X86bt (loadi64 addr:$src1), GR64:$src2), + // (implicit EFLAGS)] + [], IIC_BT_MR + >, TB; +} def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", @@ -1181,7 +1183,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))], IIC_BT_MI>, TB; - +let hasSideEffects = 0 in { def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1189,6 +1191,8 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1196,6 +1200,8 @@ def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; +} + def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1203,6 +1209,8 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1210,6 +1218,7 @@ def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; +} def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, @@ -1218,6 +1227,8 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1225,6 +1236,8 @@ def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; +} + def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1232,6 +1245,8 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1239,6 +1254,7 @@ def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; +} def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, @@ -1247,6 +1263,8 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1254,6 +1272,8 @@ def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; +} + def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1261,6 +1281,8 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1268,6 +1290,8 @@ def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; +} +} // hasSideEffects = 0 } // Defs = [EFLAGS] -- cgit v1.2.3-18-g5258 From 0b9c5e268f4cad1327c03b18d002b74547d2c215 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Dec 2012 02:08:46 +0000 Subject: Add hasSideEffects=0 to CMP*rr_REV. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 6bda62879e..693815d42f 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -700,6 +700,7 @@ class BinOpRR_F_Rev opcode, string mnemonic, X86TypeInfo typeinfo> mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; + let hasSideEffects = 0; } // BinOpRM - Instructions like "add reg, reg, [mem]". -- cgit v1.2.3-18-g5258 From 766cbae4b1abcaf451949370c2fae8b339a4dca8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Dec 2012 03:01:18 +0000 Subject: Mark the divide instructions as hasSideEffects=0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171136 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 693815d42f..d56763ea9d 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -266,6 +266,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder +let hasSideEffects = 0 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; @@ -325,6 +326,7 @@ let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), "idiv{q}\t$src", [], IIC_IDIV64>; } +} // hasSideEffects = 0 //===----------------------------------------------------------------------===// // Two address Instructions. -- cgit v1.2.3-18-g5258 From 87073aad8fca0663405c4318bf70794e149342ab Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Dec 2012 03:35:44 +0000 Subject: Add hasSideEffects=0 to some shift and rotate instructions. None of which are currently used by code generation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171137 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrShiftRotate.td | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 893488c159..1185941d34 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -51,6 +51,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), // NOTE: We don't include patterns for shifts of a register by one, because // 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one). +let hasSideEffects = 0 in { def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), "shl{b}\t$dst", [], IIC_SR>; def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), @@ -59,8 +60,9 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), "shl{l}\t$dst", [], IIC_SR>; def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", [], IIC_SR>; +} // hasSideEffects = 0 } // isConvertibleToThreeAddress = 1 -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst" // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern @@ -333,6 +335,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), // Rotate instructions //===----------------------------------------------------------------------===// +let hasSideEffects = 0 in { let Constraints = "$src1 = $dst" in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t$dst", [], IIC_SR>; @@ -455,6 +458,7 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; } +} // hasSideEffects = 0 let Constraints = "$src1 = $dst" in { // FIXME: provide shorter instructions when imm8 == 1 -- cgit v1.2.3-18-g5258 From d0f28c09582e66366c3fdb738c3c91d98a2c04a0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Dec 2012 06:34:54 +0000 Subject: Move single letter 'P' prefix out of multiclass now that tablegen allows defm to start with #NAME. This makes instruction names more searchable again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171141 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 171 +++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 86 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 70d1de55a0..9c9628916e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2680,26 +2680,26 @@ multiclass PDI_binop_all opc, string OpcodeStr, SDNode Opcode, ValueType OpVT128, ValueType OpVT256, OpndItins itins, bit IsCommutable = 0> { let Predicates = [HasAVX] in - defm VP#NAME : PDI_binop_rm, VEX_4V; let Constraints = "$src1 = $dst" in - defm P#NAME : PDI_binop_rm; let Predicates = [HasAVX2] in - defm VP#NAME#Y : PDI_binop_rm, VEX_4V, VEX_L; } // These are ordered here for pattern ordering requirements with the fp versions -defm AND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; -defm OR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; -defm XOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; -defm ANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, - SSE_BIT_ITINS_P, 0>; +defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, + SSE_BIT_ITINS_P, 0>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions @@ -3631,18 +3631,18 @@ multiclass PDI_binop_all_int opc, string OpcodeStr, Intrinsic IntId128, Intrinsic IntId256, OpndItins itins, bit IsCommutable = 0> { let Predicates = [HasAVX] in - defm VP#NAME : PDI_binop_rm_int, VEX_4V; let Constraints = "$src1 = $dst" in - defm P#NAME : PDI_binop_rm_int; let Predicates = [HasAVX2] in - defm VP#NAME#Y : PDI_binop_rm_int, VEX_4V, VEX_L; + defm V#NAME#Y : PDI_binop_rm_int, VEX_4V, VEX_L; } multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, @@ -3697,62 +3697,62 @@ multiclass PDI_binop_rm2 opc, string OpcodeStr, SDNode OpNode, } } // ExeDomain = SSEPackedInt -defm ADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; -defm ADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; -defm ADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, - SSE_INTALU_ITINS_P, 1>; -defm ADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, - SSE_INTALUQ_ITINS_P, 1>; -defm MULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, - SSE_INTMUL_ITINS_P, 1>; -defm SUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; -defm SUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; -defm SUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, - SSE_INTALU_ITINS_P, 0>; -defm SUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, - SSE_INTALUQ_ITINS_P, 0>; -defm SUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; -defm SUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; -defm MINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; -defm MINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; -defm MAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; -defm MAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; +defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, + SSE_INTALU_ITINS_P, 1>; +defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, + SSE_INTALUQ_ITINS_P, 1>; +defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, + SSE_INTMUL_ITINS_P, 1>; +defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, + SSE_INTALU_ITINS_P, 0>; +defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, + SSE_INTALUQ_ITINS_P, 0>; +defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; // Intrinsic forms -defm SUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, - int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>; -defm SUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, - int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>; -defm ADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b, - int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>; -defm ADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w, - int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>; -defm ADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, - int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; -defm ADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, - int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; -defm MULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, - int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>; -defm MULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, - int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>; -defm MADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, - int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; -defm AVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, - int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; -defm AVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, - int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; -defm SADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, - int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>; +defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, + int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>; +defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, + int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>; +defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b, + int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>; +defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w, + int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>; +defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, + int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; +defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, + int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; +defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, + int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>; +defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, + int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>; +defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, + int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; +defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, + int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; +defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, + int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; +defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, + int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>; let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, @@ -3947,30 +3947,29 @@ let Predicates = [UseSSE2] in { // SSE2 - Packed Integer Comparison Instructions //===---------------------------------------------------------------------===// -defm CMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; -defm CMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; -defm CMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, - SSE_INTALU_ITINS_P, 1>; -defm CMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; -defm CMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; -defm CMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, - SSE_INTALU_ITINS_P, 0>; +defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, + SSE_INTALU_ITINS_P, 1>; +defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, + SSE_INTALU_ITINS_P, 0>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Pack Instructions //===---------------------------------------------------------------------===// -// FIXME: Names are bad due to the need to have a 'P' prefix in the multiclass. -defm ACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128, - int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>; -defm ACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, - int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>; -defm ACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, - int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>; +defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128, + int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>; +defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, + int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>; +defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, + int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Shuffle Instructions -- cgit v1.2.3-18-g5258 From 068aec586de5d7739da023a741ed07b4efa6db73 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Dec 2012 07:16:08 +0000 Subject: Add hasSideEffects=0 to some forms of ROUND, RCP, and RSQRT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171143 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9c9628916e..672a43a7f1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3289,6 +3289,7 @@ multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), [], itins.rr>; + let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI opcss, bits<8> opcsd, Intrinsic F64Int, bit Is2Addr = 1> { let ExeDomain = GenericDomain in { // Operation, reg. + let hasSideEffects = 0 in def SSr : SS4AIi8 Date: Thu, 27 Dec 2012 07:45:10 +0000 Subject: AVX/AVX2: Move the code that lowers vector-trunc from a DAGCo-hook to custom lowering hook. The vector truncs were scalarized during LegalizeVectorOps, later vectorized again by some DAGCombine optimization and finally, lowered by a dagcombing optimization. Now, they are properly lowered during LegalizeVectorOps. No new testcase because the original testcases still work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171146 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 202 ++++++++++++++++++------------------- 1 file changed, 96 insertions(+), 106 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 31e69514fb..7016b4465d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1088,6 +1088,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FABS, MVT::v4f64, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); @@ -8315,19 +8316,109 @@ SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); - EVT SVT = Op.getOperand(0).getValueType(); + SDValue In = Op.getOperand(0); + EVT SVT = In.getValueType(); - if (!VT.is128BitVector() || !SVT.is256BitVector() || - VT.getVectorNumElements() != SVT.getVectorNumElements()) + if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) { + // On AVX2, v4i64 -> v4i32 becomes VPERMD. + if (Subtarget->hasInt256()) { + static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; + In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In); + In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32), + ShufMask); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, + DAG.getIntPtrConstant(0)); + } + + // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS. + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(0)); + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(2)); + + OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); + + // The PSHUFD mask: + static const int ShufMask1[] = {0, 2, 0, 0}; + SDValue Undef = DAG.getUNDEF(VT); + OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1); + + // The MOVLHPS mask: + static const int ShufMask2[] = {0, 1, 4, 5}; + return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2); + } + + if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) { + // On AVX2, v8i32 -> v8i16 becomed PSHUFB. + if (Subtarget->hasInt256()) { + In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In); + + SmallVector pshufbMask; + for (unsigned i = 0; i < 2; ++i) { + pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8)); + for (unsigned j = 0; j < 8; ++j) + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, + &pshufbMask[0], 32); + In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); + In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In); + + static const int ShufMask[] = {0, 2, -1, -1}; + In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64), + &ShufMask[0]); + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::BITCAST, DL, VT, In); + } + + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, + DAG.getIntPtrConstant(0)); + + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, + DAG.getIntPtrConstant(4)); + + OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi); + + // The PSHUFB mask: + static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, + -1, -1, -1, -1, -1, -1, -1, -1}; + + SDValue Undef = DAG.getUNDEF(MVT::v16i8); + OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1); + + OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); + + // The MOVLHPS Mask: + static const int ShufMask2[] = {0, 1, 4, 5}; + SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); + return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res); + } + + // Handle truncation of V256 to V128 using shuffles. + if (!VT.is128BitVector() || !SVT.is256BitVector()) return SDValue(); - assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!"); + assert(VT.getVectorNumElements() != SVT.getVectorNumElements() && + "Invalid op"); + assert(Subtarget->hasFp256() && "256-bit vector without AVX!"); unsigned NumElems = VT.getVectorNumElements(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElems * 2); - SDValue In = Op.getOperand(0); SmallVector MaskVec(NumElems * 2, -1); // Prepare truncation shuffle mask for (unsigned i = 0; i != NumElems; ++i) @@ -14395,107 +14486,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - if (!Subtarget->hasFp256()) - return SDValue(); - - EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); - EVT OpVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); - - if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) { - - // On AVX2, v4i64 -> v4i32 becomes VPERMD. - if (Subtarget->hasInt256()) { - static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op); - Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32), - ShufMask); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op, - DAG.getIntPtrConstant(0)); - } - - // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS. - SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, - DAG.getIntPtrConstant(0)); - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, - DAG.getIntPtrConstant(2)); - - OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); - - // The PSHUFD mask: - static const int ShufMask1[] = {0, 2, 0, 0}; - SDValue Undef = DAG.getUNDEF(VT); - OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1); - OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1); - - // The MOVLHPS mask: - static const int ShufMask2[] = {0, 1, 4, 5}; - return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2); - } - - if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) { - - // On AVX2, v8i32 -> v8i16 becomed PSHUFB. - if (Subtarget->hasInt256()) { - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op); - - SmallVector pshufbMask; - for (unsigned i = 0; i < 2; ++i) { - pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8)); - for (unsigned j = 0; j < 8; ++j) - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8, - &pshufbMask[0], 32); - Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV); - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op); - - static const int ShufMask[] = {0, 2, -1, -1}; - Op = DAG.getVectorShuffle(MVT::v4i64, dl, Op, DAG.getUNDEF(MVT::v4i64), - &ShufMask[0]); - Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, - DAG.getIntPtrConstant(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Op); - } - - SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op, - DAG.getIntPtrConstant(0)); - - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op, - DAG.getIntPtrConstant(4)); - - OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi); - - // The PSHUFB mask: - static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, - -1, -1, -1, -1, -1, -1, -1, -1}; - - SDValue Undef = DAG.getUNDEF(MVT::v16i8); - OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1); - OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1); - - OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); - - // The MOVLHPS Mask: - static const int ShufMask2[] = {0, 1, 4, 5}; - SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2); - return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res); - } - return SDValue(); } -- cgit v1.2.3-18-g5258 From d6fb53adb19ccfbfb1eedec11c899aaa8401d036 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 27 Dec 2012 08:15:45 +0000 Subject: On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized register. In most cases we actually compare or select YMM-sized registers and mixing the two types creates horrible code. This commit optimizes some of the transition sequences. PR14657. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171148 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 121 ++++++++++++++++++++++++++++++++----- 1 file changed, 106 insertions(+), 15 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7016b4465d..d3c21bd703 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15731,9 +15731,92 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) { return false; } +// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized +// register. In most cases we actually compare or select YMM-sized registers +// and mixing the two types creates horrible code. This method optimizes +// some of the transition sequences. +static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT.getSizeInBits() != 256) + return SDValue(); + + assert((N->getOpcode() == ISD::ANY_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND || + N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); + + SDValue Narrow = N->getOperand(0); + EVT NarrowVT = Narrow->getValueType(0); + if (NarrowVT.getSizeInBits() != 128) + return SDValue(); + + if (Narrow->getOpcode() != ISD::XOR && + Narrow->getOpcode() != ISD::AND && + Narrow->getOpcode() != ISD::OR) + return SDValue(); + + SDValue N0 = Narrow->getOperand(0); + SDValue N1 = Narrow->getOperand(1); + DebugLoc DL = Narrow->getDebugLoc(); + + // The Left side has to be a trunc. + if (N0.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + // The type of the truncated inputs. + EVT WideVT = N0->getOperand(0)->getValueType(0); + if (WideVT != VT) + return SDValue(); + + // The right side has to be a 'trunc' or a constant vector. + bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE; + bool RHSConst = (isSplatVector(N1.getNode()) && + isa(N1->getOperand(0))); + if (!RHSTrunc && !RHSConst) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT)) + return SDValue(); + + // Set N0 and N1 to hold the inputs to the new wide operation. + N0 = N0->getOperand(0); + if (RHSConst) { + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(), + N1->getOperand(0)); + SmallVector C(WideVT.getVectorNumElements(), N1); + N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size()); + } else if (RHSTrunc) { + N1 = N1->getOperand(0); + } + + // Generate the wide operation. + SDValue Op = DAG.getNode(N->getOpcode(), DL, WideVT, N0, N1); + unsigned Opcode = N->getOpcode(); + switch (Opcode) { + case ISD::ANY_EXTEND: + return Op; + case ISD::ZERO_EXTEND: { + unsigned InBits = NarrowVT.getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(InBits); + Mask = Mask.zext(VT.getScalarType().getSizeInBits()); + return DAG.getNode(ISD::AND, DL, VT, + Op, DAG.getConstant(Mask, VT)); + } + case ISD::SIGN_EXTEND: + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, + Op, DAG.getValueType(NarrowVT)); + default: + llvm_unreachable("Unexpected opcode"); + } +} + static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -15741,8 +15824,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - EVT VT = N->getValueType(0); - // Create BLSI, and BLSR instructions // BLSI is X & (-X) // BLSR is X & (X-1) @@ -15803,6 +15884,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -15810,8 +15892,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -15991,6 +16071,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -16004,8 +16085,6 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, if (!Subtarget->hasBMI()) return SDValue(); - EVT VT = N->getValueType(0); - if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); @@ -16671,6 +16750,12 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, EVT OpVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); + if (VT.isVector() && VT.getSizeInBits() == 256) { + SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); + if (R.getNode()) + return R; + } + if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) || (VT == MVT::v8i32 && OpVT == MVT::v8i16)) { @@ -16768,15 +16853,21 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() != X86ISD::SETCC_CARRY) - return SDValue(); - ConstantSDNode *C = dyn_cast(N0.getOperand(1)); - if (!C || C->getZExtValue() != 1) - return SDValue(); - return DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, - N00.getOperand(0), N00.getOperand(1)), - DAG.getConstant(1, VT)); + if (N00.getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast(N0.getOperand(1)); + if (!C || C->getZExtValue() != 1) + return SDValue(); + return DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, + N00.getOperand(0), N00.getOperand(1)), + DAG.getConstant(1, VT)); + } + } + + if (VT.isVector() && VT.getSizeInBits() == 256) { + SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); + if (R.getNode()) + return R; } // Optimize vectors in AVX mode: -- cgit v1.2.3-18-g5258 From d5fc507ff117bcfde44a0b9fb7180d518527a3e0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Dec 2012 18:51:50 +0000 Subject: Merge basic_sse12_fp_binop_p and basic_sse12_fp_binop_p_y multiclasses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171166 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 90 ++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 49 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 672a43a7f1..44846fe6d0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2825,26 +2825,32 @@ multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, itins.d, Is2Addr>, XD; } -multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, SDNode OpNode, - SizeItins itins, - bit Is2Addr = 1> { +multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, + SDNode OpNode, SizeItins itins> { +let Predicates = [HasAVX] in { + defm V#NAME#PS : sse12_fp_packed, TB, VEX_4V; + defm V#NAME#PD : sse12_fp_packed, TB, OpSize, VEX_4V; + + defm V#NAME#PSY : sse12_fp_packed, TB, VEX_4V, VEX_L; + defm V#NAME#PDY : sse12_fp_packed, TB, OpSize, VEX_4V, VEX_L; +} + +let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed, - TB; + v4f32, f128mem, memopv4f32, SSEPackedSingle, + itins.s, 1>, TB; defm PD : sse12_fp_packed, - TB, OpSize; + v2f64, f128mem, memopv2f64, SSEPackedDouble, + itins.d, 1>, TB, OpSize; } - -multiclass basic_sse12_fp_binop_p_y opc, string OpcodeStr, - SDNode OpNode, - SizeItins itins> { - defm PSY : sse12_fp_packed, - TB, VEX_L; - defm PDY : sse12_fp_packed, - TB, OpSize, VEX_L; } multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, @@ -2884,71 +2890,63 @@ multiclass basic_sse12_fp_binop_p_y_int opc, string OpcodeStr, } // Binary Arithmetic instructions +defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>; +defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>; +let isCommutable = 0 in { + defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>; + defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>; + defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>; + defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>; +} + +let isCodeGenOnly = 1 in { + defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; + defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; +} + defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; -defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>, - VEX_4V; defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>, VEX_4V, VEX_LIG; -defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>, - VEX_4V; let isCommutable = 0 in { defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, - VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, - VEX_4V; defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, + defm VMAX : basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>, basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>, VEX_4V; defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>, + defm VMIN : basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>, basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>, - basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, VEX_4V; } let Constraints = "$src1 = $dst" in { defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, - basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; let isCommutable = 0 in { defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, - basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>, basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>; defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>, basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>; } @@ -2957,17 +2955,11 @@ let Constraints = "$src1 = $dst" in { let isCodeGenOnly = 1 in { defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V; defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V; let Constraints = "$src1 = $dst" in { - defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; - defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; + defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>; + defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; } } -- cgit v1.2.3-18-g5258 From 1a330af3b54aada0b2028cf6793d90c9e2974567 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 27 Dec 2012 22:47:16 +0000 Subject: AVX/AVX2: Move the SEXT lowering code from a target specific DAGco to a lowering function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171170 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 94 +++++++++++++++++++++----------------- lib/Target/X86/X86ISelLowering.h | 1 + 2 files changed, 53 insertions(+), 42 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d3c21bd703..b6efc28316 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1125,6 +1125,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); @@ -9539,6 +9542,54 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } +SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op->getValueType(0); + SDValue In = Op->getOperand(0); + EVT InVT = In.getValueType(); + DebugLoc dl = Op->getDebugLoc(); + + if ((VT == MVT::v4i64 && InVT == MVT::v4i32) || + (VT == MVT::v8i32 && InVT == MVT::v8i16)) { + + if (Subtarget->hasInt256()) + return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In); + + // Optimize vectors in AVX mode + // Sign extend v8i16 to v8i32 and + // v4i32 to v4i64 + // + // Divide input vector into two parts + // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} + // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 + // concat the vectors to original VT + + unsigned NumElems = InVT.getVectorNumElements(); + SDValue Undef = DAG.getUNDEF(InVT); + + SmallVector ShufMask1(NumElems, -1); + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask1[i] = i; + + SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]); + + SmallVector ShufMask2(NumElems, -1); + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask2[i] = i + NumElems/2; + + SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); + OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); + } + return SDValue(); +} + // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart // from the AND / OR. @@ -11809,6 +11860,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); @@ -16746,54 +16798,12 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); - EVT OpVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); - if (VT.isVector() && VT.getSizeInBits() == 256) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; } - if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) || - (VT == MVT::v8i32 && OpVT == MVT::v8i16)) { - - if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op); - - // Optimize vectors in AVX mode - // Sign extend v8i16 to v8i32 and - // v4i32 to v4i64 - // - // Divide input vector into two parts - // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} - // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 - // concat the vectors to original VT - - unsigned NumElems = OpVT.getVectorNumElements(); - SDValue Undef = DAG.getUNDEF(OpVT); - - SmallVector ShufMask1(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask1[i] = i; - - SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]); - - SmallVector ShufMask2(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask2[i] = i + NumElems/2; - - SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]); - - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), - VT.getVectorNumElements()/2); - - OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); - OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); - } return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index eec1f7e720..a19e7ec1a1 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -824,6 +824,7 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.2.3-18-g5258 From cccccabd070c803a5822f872803fa2883895e8dd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Dec 2012 22:53:47 +0000 Subject: Merge basic_sse12_fp_binop_p_int and basic_sse12_fp_binop_p_y_int multiclasses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171171 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 56 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 44846fe6d0..0c7d23f143 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2865,28 +2865,34 @@ multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, } multiclass basic_sse12_fp_binop_p_int opc, string OpcodeStr, - SizeItins itins, - bit Is2Addr = 1> { - defm PS : sse12_fp_packed_int, - TB; + SizeItins itins> { +let Predicates = [HasAVX] in { + defm V#NAME#PS : sse12_fp_packed_int, TB, VEX_4V; - defm PD : sse12_fp_packed_int, - TB, OpSize; + defm V#NAME#PD : sse12_fp_packed_int, TB, OpSize, VEX_4V; + + defm V#NAME#PSY : sse12_fp_packed_int, TB, VEX_4V, VEX_L; + + defm V#NAME#PDY : sse12_fp_packed_int, TB, OpSize, VEX_4V, VEX_L; } -multiclass basic_sse12_fp_binop_p_y_int opc, string OpcodeStr, - SizeItins itins> { - defm PSY : sse12_fp_packed_int, TB, VEX_L; +let Constraints = "$src1 = $dst" in { + defm PS : sse12_fp_packed_int, TB; - defm PDY : sse12_fp_packed_int, TB, OpSize, VEX_L; + defm PD : sse12_fp_packed_int, TB, OpSize; +} } // Binary Arithmetic instructions @@ -2896,7 +2902,9 @@ let isCommutable = 0 in { defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>; defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>; defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>; + defm MAX : basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>; defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>; + defm MIN : basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>; } let isCodeGenOnly = 1 in { @@ -2921,15 +2929,9 @@ let isCommutable = 0 in { defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMAX : basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>, - VEX_4V; defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMIN : basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>, - VEX_4V; } let Constraints = "$src1 = $dst" in { @@ -2944,11 +2946,9 @@ let Constraints = "$src1 = $dst" in { defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>; + basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>; + basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; } } -- cgit v1.2.3-18-g5258 From 587fb1dd30b73afb3c83a1e88d9ea101a0b28ab2 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 27 Dec 2012 23:08:05 +0000 Subject: Reverse the 'if' condition and reduce the indentation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171172 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 57 +++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 29 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b6efc28316..263a5d668a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9549,45 +9549,44 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, EVT InVT = In.getValueType(); DebugLoc dl = Op->getDebugLoc(); - if ((VT == MVT::v4i64 && InVT == MVT::v4i32) || - (VT == MVT::v8i32 && InVT == MVT::v8i16)) { + if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && + (VT != MVT::v8i32 || InVT != MVT::v8i16)) + return SDValue(); - if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In); + if (Subtarget->hasInt256()) + return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In); - // Optimize vectors in AVX mode - // Sign extend v8i16 to v8i32 and - // v4i32 to v4i64 - // - // Divide input vector into two parts - // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} - // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 - // concat the vectors to original VT + // Optimize vectors in AVX mode + // Sign extend v8i16 to v8i32 and + // v4i32 to v4i64 + // + // Divide input vector into two parts + // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} + // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 + // concat the vectors to original VT - unsigned NumElems = InVT.getVectorNumElements(); - SDValue Undef = DAG.getUNDEF(InVT); + unsigned NumElems = InVT.getVectorNumElements(); + SDValue Undef = DAG.getUNDEF(InVT); - SmallVector ShufMask1(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask1[i] = i; + SmallVector ShufMask1(NumElems, -1); + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask1[i] = i; - SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]); + SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]); - SmallVector ShufMask2(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask2[i] = i + NumElems/2; + SmallVector ShufMask2(NumElems, -1); + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask2[i] = i + NumElems/2; - SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); + SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), - VT.getVectorNumElements()/2); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + VT.getVectorNumElements()/2); - OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); - OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); + OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); + OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); - } - return SDValue(); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or -- cgit v1.2.3-18-g5258 From 0509db27386f5cafffd364618365ecda741cf0bd Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 28 Dec 2012 05:45:24 +0000 Subject: AVX: Move the ZEXT/ANYEXT DAGCo optimizations to the lowering of these optimizations. The old test cases still cover all of these lowering/optimizations. The single change that we have is that now anyext does not need to zero a register, because it does not use the exact code path as the zero_extend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171178 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 111 +++++++++++++++++++++++-------------- lib/Target/X86/X86ISelLowering.h | 5 +- 2 files changed, 71 insertions(+), 45 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 263a5d668a..ca86f660b2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1125,8 +1125,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); @@ -8292,12 +8296,70 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co } } -SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = Op->getValueType(0); + SDValue In = Op->getOperand(0); + EVT InVT = In.getValueType(); + DebugLoc dl = Op->getDebugLoc(); + + // Optimize vectors in AVX mode: + // + // v8i16 -> v8i32 + // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. + // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. + // Concat upper and lower parts. + // + // v4i32 -> v4i64 + // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. + // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. + // Concat upper and lower parts. + // + + if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) && + ((VT != MVT::v4i64) || (InVT != MVT::v4i32))) + return SDValue(); + + if (Subtarget->hasInt256()) + return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In); + + SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl); + SDValue Undef = DAG.getUNDEF(InVT); + bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND; + SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); + SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); +} + +SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + if (Subtarget->hasFp256()) { + SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); + if (Res.getNode()) + return Res; + } + + return SDValue(); +} +SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); SDValue In = Op.getOperand(0); EVT SVT = In.getValueType(); + if (Subtarget->hasFp256()) { + SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); + if (Res.getNode()) + return Res; + } + if (!VT.is256BitVector() || !SVT.is128BitVector() || VT.getVectorNumElements() != SVT.getVectorNumElements()) return SDValue(); @@ -11849,7 +11911,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG); - case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG); + case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); + case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); @@ -11859,7 +11923,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); @@ -16856,7 +16919,6 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = N->getDebugLoc(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - EVT OpVT = N0.getValueType(); if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && @@ -16879,43 +16941,6 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, return R; } - // Optimize vectors in AVX mode: - // - // v8i16 -> v8i32 - // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. - // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. - // Concat upper and lower parts. - // - // v4i32 -> v4i64 - // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. - // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. - // Concat upper and lower parts. - // - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - if (!Subtarget->hasFp256()) - return SDValue(); - - if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) || - ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) { - - if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0); - - SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl); - SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec); - SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec); - - EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - VT.getVectorNumElements()/2); - - OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); - } - return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a19e7ec1a1..1a696a8f53 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -813,7 +813,9 @@ namespace llvm { SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; @@ -824,7 +826,6 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.2.3-18-g5258 From 40ef8b75487ad9f98a93b1dbf283a25658ef8a1e Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 28 Dec 2012 07:28:43 +0000 Subject: wrap 80-col lines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ca86f660b2..e5122abd4b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8348,7 +8348,8 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, return SDValue(); } -SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, + SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); SDValue In = Op.getOperand(0); @@ -8373,7 +8374,9 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In); static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1}; SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, - DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0])); + DAG.getVectorShuffle(MVT::v8i16, DL, In, + DAG.getUNDEF(MVT::v8i16), + &Mask[0])); return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi); } -- cgit v1.2.3-18-g5258 From ae34b4280ebde6217706902e8a27bb858765a61c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 28 Dec 2012 08:19:03 +0000 Subject: CostModel: initial checkin for code that estimates the cost of special shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171180 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 2 ++ 2 files changed, 18 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e5122abd4b..5d495a68ed 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18237,3 +18237,19 @@ unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); } + +unsigned X86VectorTargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index) const { + // We only estimate the cost of reverse shuffles. + if (Kind != Reverse) + return VectorTargetTransformImpl::getShuffleCost(Kind, Tp, Index); + + std::pair LT = getTypeLegalizationCost(Tp); + unsigned Cost = 1; + if (LT.second.getSizeInBits() > 128) + Cost = 3; // Extract + insert + copy. + + // Multiple by the number of parts. + return Cost * LT.first; +} + diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1a696a8f53..1b4b5eb65d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -973,6 +973,8 @@ namespace llvm { virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; + + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index) const; }; } -- cgit v1.2.3-18-g5258 From c20323a7588bc5ad327155fe09c2b51435c3ada2 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sat, 29 Dec 2012 15:57:26 +0000 Subject: Simplify code, no functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171226 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5d495a68ed..4cdcdab9f1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1733,7 +1733,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CCInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; EVT CopyVT = VA.getValVT(); @@ -1990,10 +1990,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. - if (RegVT.isVector()) { - ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), - ArgValue); - } else + if (RegVT.isVector()) + ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); + else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } } else { @@ -12405,18 +12404,16 @@ bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { - return Imm == (int32_t)Imm; + return isInt<32>(Imm); } bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const { // Can also use sub to handle negated immediates. - return Imm == (int32_t)Imm; + return isInt<32>(Imm); } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { @@ -12424,9 +12421,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { @@ -17553,13 +17548,13 @@ TargetLowering::ConstraintWeight case 'f': case 't': case 'u': - if (type->isFloatingPointTy()) - weight = CW_SpecificReg; - break; + if (type->isFloatingPointTy()) + weight = CW_SpecificReg; + break; case 'y': - if (type->isX86_MMXTy() && Subtarget->hasMMX()) - weight = CW_SpecificReg; - break; + if (type->isX86_MMXTy() && Subtarget->hasMMX()) + weight = CW_SpecificReg; + break; case 'x': case 'Y': if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) || -- cgit v1.2.3-18-g5258 From 6d183e400720b703dc9dbe6c8a28b615441601a2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 29 Dec 2012 16:44:25 +0000 Subject: Remove intrinsic specific instructions for SSE/SSE2/AVX floating point max/min instructions. Lower them to target specific nodes and use those patterns instead. This also allows them to be commuted if UnsafeFPMath is enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171227 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 29 ++++++++++++++++++++ lib/Target/X86/X86InstrInfo.cpp | 20 -------------- lib/Target/X86/X86InstrSSE.td | 54 -------------------------------------- 3 files changed, 29 insertions(+), 74 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4cdcdab9f1..48a7244141 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10400,6 +10400,35 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::SMIN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + // SSE/SSE2/AVX floating point max/min intrinsics. + case Intrinsic::x86_sse_max_ps: + case Intrinsic::x86_sse2_max_pd: + case Intrinsic::x86_avx_max_ps_256: + case Intrinsic::x86_avx_max_pd_256: + case Intrinsic::x86_sse_min_ps: + case Intrinsic::x86_sse2_min_pd: + case Intrinsic::x86_avx_min_ps_256: + case Intrinsic::x86_avx_min_pd_256: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse_max_ps: + case Intrinsic::x86_sse2_max_pd: + case Intrinsic::x86_avx_max_ps_256: + case Intrinsic::x86_avx_max_pd_256: + Opcode = X86ISD::FMAX; + break; + case Intrinsic::x86_sse_min_ps: + case Intrinsic::x86_sse2_min_pd: + case Intrinsic::x86_avx_min_ps_256: + case Intrinsic::x86_avx_min_pd_256: + Opcode = X86ISD::FMIN; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + // AVX2 variable shift intrinsics case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index bb81fa163f..f3ec067bdb 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -710,21 +710,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 }, { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 }, { X86::MAXSDrr, X86::MAXSDrm, 0 }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, { X86::MAXSSrr, X86::MAXSSrm, 0 }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, - { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 }, { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, - { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 }, { X86::MINSDrr, X86::MINSDrm, 0 }, - { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, { X86::MINSSrr, X86::MINSSrm, 0 }, - { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, @@ -896,21 +888,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 }, { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 }, { X86::VMAXPDrr, X86::VMAXPDrm, 0 }, - { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, 0 }, { X86::VMAXPSrr, X86::VMAXPSrm, 0 }, - { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, 0 }, { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, - { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 }, { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, - { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 }, { X86::VMINPDrr, X86::VMINPDrm, 0 }, - { X86::VMINPDrr_Int, X86::VMINPDrm_Int, 0 }, { X86::VMINPSrr, X86::VMINPSrm, 0 }, - { X86::VMINPSrr_Int, X86::VMINPSrm_Int, 0 }, { X86::VMINSDrr, X86::VMINSDrm, 0 }, - { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 }, { X86::VMINSSrr, X86::VMINSSrm, 0 }, - { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 }, { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 }, { X86::VMULPDrr, X86::VMULPDrm, 0 }, { X86::VMULPSrr, X86::VMULPSrm, 0 }, @@ -1037,13 +1021,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 }, { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 }, { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 }, - { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, 0 }, { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 }, - { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, 0 }, { X86::VMINPDYrr, X86::VMINPDYrm, 0 }, - { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, 0 }, { X86::VMINPSYrr, X86::VMINPSYrm, 0 }, - { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, 0 }, { X86::VMULPDYrr, X86::VMULPDYrm, 0 }, { X86::VMULPSYrr, X86::VMULPSYrm, 0 }, { X86::VORPDYrr, X86::VORPDYrm, 0 }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0c7d23f143..041b4ceec0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -217,27 +217,6 @@ multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, pat_rm, IIC_DEFAULT, d>; } -/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class -multiclass sse12_fp_packed_int opc, string OpcodeStr, RegisterClass RC, - string asm, string SSEVer, string FPSizeStr, - X86MemOperand x86memop, PatFrag mem_frag, - Domain d, OpndItins itins, bit Is2Addr = 1> { - def rr_Int : PI( - !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], IIC_DEFAULT, d>; - def rm_Int : PI( - !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>; -} - //===----------------------------------------------------------------------===// // Non-instruction patterns //===----------------------------------------------------------------------===// @@ -2864,37 +2843,6 @@ multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, itins.d, Is2Addr>, XD; } -multiclass basic_sse12_fp_binop_p_int opc, string OpcodeStr, - SizeItins itins> { -let Predicates = [HasAVX] in { - defm V#NAME#PS : sse12_fp_packed_int, TB, VEX_4V; - - defm V#NAME#PD : sse12_fp_packed_int, TB, OpSize, VEX_4V; - - defm V#NAME#PSY : sse12_fp_packed_int, TB, VEX_4V, VEX_L; - - defm V#NAME#PDY : sse12_fp_packed_int, TB, OpSize, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { - defm PS : sse12_fp_packed_int, TB; - - defm PD : sse12_fp_packed_int, TB, OpSize; -} -} - // Binary Arithmetic instructions defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>; defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>; @@ -2902,9 +2850,7 @@ let isCommutable = 0 in { defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>; defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>; defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>; - defm MAX : basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>; defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>; - defm MIN : basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>; } let isCodeGenOnly = 1 in { -- cgit v1.2.3-18-g5258 From 6f57f39e4ac767c84b25973a2f517fde3d37a8dd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 29 Dec 2012 17:19:06 +0000 Subject: Merge similar functionality using a nested switch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171229 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 47 +++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 8 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 48a7244141..2b48d50f92 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10373,32 +10373,63 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_avx2_pmaxu_b: case Intrinsic::x86_avx2_pmaxu_w: case Intrinsic::x86_avx2_pmaxu_d: - return DAG.getNode(X86ISD::UMAX, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_pminu_b: case Intrinsic::x86_sse41_pminuw: case Intrinsic::x86_sse41_pminud: case Intrinsic::x86_avx2_pminu_b: case Intrinsic::x86_avx2_pminu_w: case Intrinsic::x86_avx2_pminu_d: - return DAG.getNode(X86ISD::UMIN, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse41_pmaxsb: case Intrinsic::x86_sse2_pmaxs_w: case Intrinsic::x86_sse41_pmaxsd: case Intrinsic::x86_avx2_pmaxs_b: case Intrinsic::x86_avx2_pmaxs_w: case Intrinsic::x86_avx2_pmaxs_d: - return DAG.getNode(X86ISD::SMAX, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse41_pminsb: case Intrinsic::x86_sse2_pmins_w: case Intrinsic::x86_sse41_pminsd: case Intrinsic::x86_avx2_pmins_b: case Intrinsic::x86_avx2_pmins_w: - case Intrinsic::x86_avx2_pmins_d: - return DAG.getNode(X86ISD::SMIN, dl, Op.getValueType(), + case Intrinsic::x86_avx2_pmins_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse2_pmaxu_b: + case Intrinsic::x86_sse41_pmaxuw: + case Intrinsic::x86_sse41_pmaxud: + case Intrinsic::x86_avx2_pmaxu_b: + case Intrinsic::x86_avx2_pmaxu_w: + case Intrinsic::x86_avx2_pmaxu_d: + Opcode = X86ISD::UMAX; + break; + case Intrinsic::x86_sse2_pminu_b: + case Intrinsic::x86_sse41_pminuw: + case Intrinsic::x86_sse41_pminud: + case Intrinsic::x86_avx2_pminu_b: + case Intrinsic::x86_avx2_pminu_w: + case Intrinsic::x86_avx2_pminu_d: + Opcode = X86ISD::UMIN; + break; + case Intrinsic::x86_sse41_pmaxsb: + case Intrinsic::x86_sse2_pmaxs_w: + case Intrinsic::x86_sse41_pmaxsd: + case Intrinsic::x86_avx2_pmaxs_b: + case Intrinsic::x86_avx2_pmaxs_w: + case Intrinsic::x86_avx2_pmaxs_d: + Opcode = X86ISD::SMAX; + break; + case Intrinsic::x86_sse41_pminsb: + case Intrinsic::x86_sse2_pmins_w: + case Intrinsic::x86_sse41_pminsd: + case Intrinsic::x86_avx2_pmins_b: + case Intrinsic::x86_avx2_pmins_w: + case Intrinsic::x86_avx2_pmins_d: + Opcode = X86ISD::SMIN; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + } // SSE/SSE2/AVX floating point max/min intrinsics. case Intrinsic::x86_sse_max_ps: -- cgit v1.2.3-18-g5258 From 22d8f0d68519240b0936983322cfdb9c84a4ed0c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 29 Dec 2012 18:18:20 +0000 Subject: Remove intrinsic specific instructions for (V)SQRTPS/PD. Instead lower to target-independent ISD nodes and use the existing patterns for those. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171237 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++++++ lib/Target/X86/X86InstrInfo.cpp | 15 --------------- lib/Target/X86/X86InstrSSE.td | 12 +----------- 3 files changed, 7 insertions(+), 26 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2b48d50f92..d279d2da52 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10527,6 +10527,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(1)); + case Intrinsic::x86_sse_sqrt_ps: + case Intrinsic::x86_sse2_sqrt_pd: + case Intrinsic::x86_avx_sqrt_ps_256: + case Intrinsic::x86_avx_sqrt_pd_256: + return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1)); + // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f3ec067bdb..a7424096a8 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -467,9 +467,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 }, { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 }, { X86::SQRTSDr, X86::SQRTSDm, 0 }, { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, { X86::SQRTSSr, X86::SQRTSSm, 0 }, @@ -528,9 +526,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 }, { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, 0 }, { X86::VSQRTPDr, X86::VSQRTPDm, 0 }, - { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, 0 }, { X86::VSQRTPSr, X86::VSQRTPSm, 0 }, - { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, 0 }, { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, @@ -554,11 +550,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 }, { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, - { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, 0 }, { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, - { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, 0 }, { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, - { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, 0 }, { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, @@ -4670,13 +4663,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::DIVSSrr: case X86::DIVSSrr_Int: case X86::SQRTPDm: - case X86::SQRTPDm_Int: case X86::SQRTPDr: - case X86::SQRTPDr_Int: case X86::SQRTPSm: - case X86::SQRTPSm_Int: case X86::SQRTPSr: - case X86::SQRTPSr_Int: case X86::SQRTSDm: case X86::SQRTSDm_Int: case X86::SQRTSDr: @@ -4695,13 +4684,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VDIVSSrr: case X86::VDIVSSrr_Int: case X86::VSQRTPDm: - case X86::VSQRTPDm_Int: case X86::VSQRTPDr: - case X86::VSQRTPDr_Int: case X86::VSQRTPSm: - case X86::VSQRTPSm_Int: case X86::VSQRTPSr: - case X86::VSQRTPSr_Int: case X86::VSQRTSDm: case X86::VSQRTSDm_Int: case X86::VSQRTSDr: diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 041b4ceec0..120202fef8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3116,14 +3116,6 @@ let Predicates = [HasAVX] in { sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps, - SSE_SQRTP>, - sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd, - SSE_SQRTP>, - sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256, - SSE_SQRTP>, - sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256, - SSE_SQRTP>, VEX; // Reciprocal approximations. Note that these typically require refinement @@ -3202,11 +3194,9 @@ let Predicates = [HasAVX] in { defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, SSE_SQRTS>, sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, - sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps, SSE_SQRTS>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, SSE_SQRTS>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, - sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>; + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>; /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, -- cgit v1.2.3-18-g5258 From 831737d329a727f53a1fb0572f7b7a8127208881 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 30 Dec 2012 10:32:01 +0000 Subject: Remove the Function::getFnAttributes method in favor of using the AttributeSet directly. This is in preparation for removing the use of the 'Attribute' class as a collection of attributes. That will shift to the AttributeSet class instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171253 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 3 ++- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.cpp | 25 ++++++++++++++----------- lib/Target/X86/X86InstrInfo.cpp | 8 ++++---- lib/Target/X86/X86RegisterInfo.cpp | 3 ++- 5 files changed, 24 insertions(+), 19 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 6345487751..f70be1d2c1 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -691,7 +691,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). We also check that we don't // push and pop from the stack. - if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attribute::NoRedZone) && + if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index acb89c11fc..c9918b900d 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -431,8 +431,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->getFnAttributes(). - hasAttribute(Attribute::OptimizeForSize); + OptForSize = MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d279d2da52..0900603092 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1389,7 +1389,8 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, MachineFunction &MF) const { const Function *F = MF.getFunction(); if ((!IsMemset || ZeroMemset) && - !F->getFnAttributes().hasAttribute(Attribute::NoImplicitFloat)) { + !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && @@ -2068,8 +2069,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); - bool NoImplicitFloatOps = Fn->getFnAttributes(). - hasAttribute(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = Fn->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && @@ -2547,8 +2548,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, OpFlags = X86II::MO_DARWIN_STUB; } else if (Subtarget->isPICStyleRIPRel() && isa(GV) && - cast(GV)->getFnAttributes(). - hasAttribute(Attribute::NonLazyBind)) { + cast(GV)->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::NonLazyBind)) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). @@ -6734,8 +6736,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool HasFp256 = Subtarget->hasFp256(); bool HasInt256 = Subtarget->hasInt256(); MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = MF.getFunction()->getFnAttributes(). - hasAttribute(Attribute::OptimizeForSize); + bool OptForSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); @@ -10103,8 +10105,9 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Sanity Check: Make sure using fp_offset makes sense. assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() - .getFunction()->getFnAttributes() - .hasAttribute(Attribute::NoImplicitFloat)) && + .getFunction()->getAttributes() + .hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat)) && Subtarget->hasSSE1()); } @@ -16574,8 +16577,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); - bool NoImplicitFloatOps = F->getFnAttributes(). - hasAttribute(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = F->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a7424096a8..16c6b3d73c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3864,8 +3864,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->getFnAttributes(). - hasAttribute(Attribute::OptimizeForSize) && + if (!MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) return 0; @@ -3906,8 +3906,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->getFnAttributes(). - hasAttribute(Attribute::OptimizeForSize) && + if (!MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) return 0; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 6b36101a12..07c0d148aa 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -419,7 +419,8 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->getFnAttributes().hasAttribute(Attribute::StackAlignment)); + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); // If we've requested that we force align the stack do so now. if (ForceStackAlign) -- cgit v1.2.3-18-g5258 From 8b62abdd7b9c8fc5d78dad86093f4afdfeba949d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 30 Dec 2012 13:01:51 +0000 Subject: Remove the Function::getRetAttributes method in favor of using the AttributeSet accessor method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171256 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 313bf2232c..9d2dac2c9b 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -740,8 +740,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; @@ -1585,8 +1584,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Check whether the function can return without sret-demotion. SmallVector Outs; - GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), *FuncInfo.MF, FTy->isVarArg(), Outs, FTy->getContext()); -- cgit v1.2.3-18-g5258 From 94e94b350652d3a71993bbc7d44afbe3b304605e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 30 Dec 2012 13:50:49 +0000 Subject: Use the predicate methods off of AttributeSet instead of Attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171257 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0900603092..401b9b0346 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11056,7 +11056,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) - if (Attrs.getParamAttributes(Idx).hasAttribute(Attribute::InReg)) + if (Attrs.hasAttribute(Idx, Attribute::InReg)) // FIXME: should only count parameters that are lowered to integers. InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; -- cgit v1.2.3-18-g5258 From dd9ccdb05036748513ec3c8c73e86ffa00a2e6cf Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 31 Dec 2012 23:49:05 +0000 Subject: Use packed instead of scalar itineraries for SSE1/2 SQRTPS/PD, RCPPS, and RSQRTPS. VEX-encoded forms already use packed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171336 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 120202fef8..9082c5a6ea 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3193,10 +3193,10 @@ let Predicates = [HasAVX] in { // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, SSE_SQRTS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, SSE_SQRTS>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>; + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, @@ -3229,9 +3229,9 @@ multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, // in order to obtain suitable precision. defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, SSE_SQRTS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, - SSE_SQRTS>; + SSE_SQRTP>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), (RSQRTSSr_Int VR128:$src, VR128:$src)>; @@ -3239,8 +3239,8 @@ let Predicates = [UseSSE1] in { defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, SSE_RCPS>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>, - sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>; + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, + sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPP>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), (RCPSSr_Int VR128:$src, VR128:$src)>; -- cgit v1.2.3-18-g5258 From 5284f976327d789b9e52089ff684fb36f6d964dd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 1 Jan 2013 00:11:07 +0000 Subject: Merge AVX/SSE instruction definitions for SQRTPS/PD, RSQRTPS, RCPPS. No funcitonal change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171337 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 105 +++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 47 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9082c5a6ea..6bcbc39298 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2974,27 +2974,37 @@ multiclass sse1_fp_unop_s_avx opc, string OpcodeStr> { /// sse1_fp_unop_p - SSE1 unops in packed form. multiclass sse1_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { +let Predicates = [HasAVX] in { + def V#NAME#PSr : PSI, VEX; + def V#NAME#PSm : PSI, VEX; + def V#NAME#PSYr : PSI, VEX, VEX_L; + def V#NAME#PSYm : PSI, VEX, VEX_L; +} + def PSr : PSI; + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; def PSm : PSI; } -/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. -multiclass sse1_fp_unop_p_y opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - def PSYr : PSI, VEX_L; - def PSYm : PSI, VEX_L; -} - /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. multiclass sse1_fp_unop_p_int opc, string OpcodeStr, Intrinsic V4F32Int, OpndItins itins> { @@ -3045,7 +3055,7 @@ let hasSideEffects = 0 in multiclass sse2_fp_unop_s_avx opc, string OpcodeStr> { def SDr : SDI; + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; let mayLoad = 1 in { def SDm : SDI opc, string OpcodeStr> { } } -/// sse2_fp_unop_p - SSE2 unops in vector forms. +/// sse2_fp_unop_p_new - SSE2 unops in vector forms. multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { +let Predicates = [HasAVX] in { + def V#NAME#PDr : PDI, VEX; + def V#NAME#PDm : PDI, VEX; + def V#NAME#PDYr : PDI, VEX, VEX_L; + def V#NAME#PDYm : PDI, VEX, VEX_L; +} + def PDr : PDI; @@ -3068,19 +3101,6 @@ multiclass sse2_fp_unop_p opc, string OpcodeStr, [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; } -/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. -multiclass sse2_fp_unop_p_y opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - def PDYr : PDI, VEX_L; - def PDYm : PDI, VEX_L; -} - /// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. multiclass sse2_fp_unop_p_int opc, string OpcodeStr, Intrinsic V2F64Int, OpndItins itins> { @@ -3107,31 +3127,26 @@ multiclass sse2_fp_unop_p_y_int opc, string OpcodeStr, itins.rm>, VEX_L; } +defm SQRT : sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; +defm RSQRT : sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>; +defm RCP : sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>; + let Predicates = [HasAVX] in { // Square root. defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; - defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - VEX; - // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; - defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>, - sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>, - sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256, + defm VRSQRT : sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256, SSE_SQRTP>, sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps, SSE_SQRTP>, VEX; defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; - defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>, - sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>, - sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256, + defm VRCP : sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps, SSE_RCPP>, VEX; @@ -3193,10 +3208,8 @@ let Predicates = [HasAVX] in { // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, SSE_SQRTS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, - SSE_SQRTS>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; + SSE_SQRTS>; /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, @@ -3229,7 +3242,6 @@ multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, // in order to obtain suitable precision. defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, SSE_SQRTS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, SSE_SQRTP>; let Predicates = [UseSSE1] in { @@ -3239,7 +3251,6 @@ let Predicates = [UseSSE1] in { defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, SSE_RCPS>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPP>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), -- cgit v1.2.3-18-g5258 From 42ab0d75aba8cbd466a0d7769c047ab210a48753 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 1 Jan 2013 02:02:45 +0000 Subject: Remove 2 unused multiclasses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171338 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6bcbc39298..91ef15b107 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3101,32 +3101,6 @@ let Predicates = [HasAVX] in { [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; } -/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. -multiclass sse2_fp_unop_p_int opc, string OpcodeStr, - Intrinsic V2F64Int, OpndItins itins> { - def PDr_Int : PDI; - def PDm_Int : PDI; -} - -/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms. -multiclass sse2_fp_unop_p_y_int opc, string OpcodeStr, - Intrinsic V2F64Int, OpndItins itins> { - def PDYr_Int : PDI, VEX_L; - def PDYm_Int : PDI, VEX_L; -} - defm SQRT : sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; defm RSQRT : sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>; -- cgit v1.2.3-18-g5258 From 76f94fd3619f2460c5ee64383e2741ac34773e36 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 1 Jan 2013 03:30:21 +0000 Subject: Merge intrinsic instruction definitions for SSE and AVX versions of RCPPS and RSQRTPS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171339 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 64 +++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 30 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 91ef15b107..b2caf13ca8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3007,7 +3007,32 @@ let Predicates = [HasAVX] in { /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. multiclass sse1_fp_unop_p_int opc, string OpcodeStr, - Intrinsic V4F32Int, OpndItins itins> { + Intrinsic V4F32Int, Intrinsic V8F32Int, + OpndItins itins> { +let Predicates = [HasAVX] in { + def V#NAME#PSr_Int : PSI, VEX; + def V#NAME#PSm_Int : PSI, VEX; + def V#NAME#PSYr_Int : PSI, VEX, VEX_L; + def V#NAME#PSYm_Int : PSI, VEX, VEX_L; +} + def PSr_Int : PSI opc, string OpcodeStr, itins.rm>; } -/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms. -multiclass sse1_fp_unop_p_y_int opc, string OpcodeStr, - Intrinsic V4F32Int, OpndItins itins> { - def PSYr_Int : PSI, VEX_L; - def PSYm_Int : PSI, VEX_L; -} - /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int, OpndItins itins> { @@ -3103,8 +3115,12 @@ let Predicates = [HasAVX] in { defm SQRT : sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; -defm RSQRT : sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>; -defm RCP : sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>; +defm RSQRT : sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, + sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, + int_x86_avx_rsqrt_ps_256, SSE_SQRTP>; +defm RCP : sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, + sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, + int_x86_avx_rcp_ps_256, SSE_RCPP>; let Predicates = [HasAVX] in { // Square root. @@ -3114,16 +3130,7 @@ let Predicates = [HasAVX] in { // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; - defm VRSQRT : sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256, - SSE_SQRTP>, - sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps, - SSE_SQRTP>, VEX; - defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; - defm VRCP : sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256, - SSE_RCPP>, - sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps, - SSE_RCPP>, VEX; } def : Pat<(f32 (fsqrt FR32:$src)), @@ -3215,17 +3222,14 @@ multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, - SSE_SQRTS>, - sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, - SSE_SQRTP>; + SSE_SQRTS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), (RSQRTSSr_Int VR128:$src, VR128:$src)>; } defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, - SSE_RCPS>, - sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPP>; + SSE_RCPS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), (RCPSSr_Int VR128:$src, VR128:$src)>; -- cgit v1.2.3-18-g5258 From 117e4d2e190fe2e6427e5ec0b633048d9a744018 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 1 Jan 2013 03:42:44 +0000 Subject: Remove unused argument from a multiclass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171340 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b2caf13ca8..a53ad61af8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3194,7 +3194,7 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, - Intrinsic F32Int, OpndItins itins> { + OpndItins itins> { def SSr : SSI; @@ -3221,15 +3221,13 @@ multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, - SSE_SQRTS>; +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), (RSQRTSSr_Int VR128:$src, VR128:$src)>; } -defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, - SSE_RCPS>; +defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), (RCPSSr_Int VR128:$src, VR128:$src)>; -- cgit v1.2.3-18-g5258 From b511048cd0ef3fcad0cef7c004ea1f2f5c28003b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 1 Jan 2013 20:53:20 +0000 Subject: Merge SSE and AVX instruction definitions for scalar forms of SQRT, RSQRT, and RCP. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171351 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 179 +++++++++++++++++++++++------------------- 1 file changed, 97 insertions(+), 82 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a53ad61af8..dc1ffecb23 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2936,6 +2936,26 @@ def SSE_RCPS : OpndItins< /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int, OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; + let mayLoad = 1 in { + def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; + def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; + } +} + def SSr : SSI; @@ -2955,19 +2975,50 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; } -/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. -multiclass sse1_fp_unop_s_avx opc, string OpcodeStr> { - def SSr : SSI; +/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. +multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; let mayLoad = 1 in { - def SSm : SSI; - def SSm_Int : SSI; + def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; + def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; + } +} + + def SSr : SSI; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I, XS, + Requires<[UseSSE1, OptForSize]>; + let Constraints = "$src1 = $dst" in { + def SSr_Int : SSI; + let mayLoad = 1, hasSideEffects = 0 in + def SSm_Int : SSI; } } @@ -3046,6 +3097,26 @@ let Predicates = [HasAVX] in { /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int, OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SDr : SDI, VEX_4V, VEX_LIG; + let mayLoad = 1 in { + def V#NAME#SDm : SDI, VEX_4V, VEX_LIG; + def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG; + } +} + def SDr : SDI; @@ -3062,24 +3133,7 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; } -/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. -let hasSideEffects = 0 in -multiclass sse2_fp_unop_s_avx opc, string OpcodeStr> { - def SDr : SDI; - let mayLoad = 1 in { - def SDm : SDI; - def SDm_Int : SDI; - } -} - -/// sse2_fp_unop_p_new - SSE2 unops in vector forms. +/// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { @@ -3113,26 +3167,25 @@ let Predicates = [HasAVX] in { [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; } -defm SQRT : sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, +// Square root. +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, + SSE_SQRTS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, + SSE_SQRTS>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; -defm RSQRT : sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, + +// Reciprocal approximations. Note that these typically require refinement +// in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, int_x86_avx_rsqrt_ps_256, SSE_SQRTP>; -defm RCP : sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, +defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, int_x86_avx_rcp_ps_256, SSE_RCPP>; -let Predicates = [HasAVX] in { - // Square root. - defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, - sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; - - // Reciprocal approximations. Note that these typically require refinement - // in order to obtain suitable precision. - defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; - defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; -} - def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3186,49 +3239,11 @@ let Predicates = [HasAVX] in { (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; } -// Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, - SSE_SQRTS>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, - SSE_SQRTS>; - -/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. -multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - def SSr : SSI; - // For scalar unary operations, fold a load into the operation - // only in OptForSize mode. It eliminates an instruction, but it also - // eliminates a whole-register clobber (the load), so it introduces a - // partial register update condition. - def SSm : I, XS, - Requires<[UseSSE1, OptForSize]>; - let Constraints = "$src1 = $dst" in { - def SSr_Int : SSI; - let mayLoad = 1, hasSideEffects = 0 in - def SSm_Int : SSI; - } -} - // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), (RSQRTSSr_Int VR128:$src, VR128:$src)>; -} - -defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>; -let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), (RCPSSr_Int VR128:$src, VR128:$src)>; } -- cgit v1.2.3-18-g5258 From 9478673ce0e9ea357ee0dd7baf4837c319828a94 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 2 Jan 2013 01:35:11 +0000 Subject: Revert 171351. It broke MC/X86/x86-32-avx.s. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171352 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 179 +++++++++++++++++++----------------------- 1 file changed, 82 insertions(+), 97 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dc1ffecb23..a53ad61af8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2936,26 +2936,6 @@ def SSE_RCPS : OpndItins< /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int, OpndItins itins> { -let Predicates = [HasAVX], hasSideEffects = 0 in { - def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; - let mayLoad = 1 in { - def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; - def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; - } -} - def SSr : SSI; @@ -2975,50 +2955,19 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; } -/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. -multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { -let Predicates = [HasAVX], hasSideEffects = 0 in { - def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; +/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. +multiclass sse1_fp_unop_s_avx opc, string OpcodeStr> { + def SSr : SSI; let mayLoad = 1 in { - def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; - def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; - } -} - - def SSr : SSI; - // For scalar unary operations, fold a load into the operation - // only in OptForSize mode. It eliminates an instruction, but it also - // eliminates a whole-register clobber (the load), so it introduces a - // partial register update condition. - def SSm : I, XS, - Requires<[UseSSE1, OptForSize]>; - let Constraints = "$src1 = $dst" in { - def SSr_Int : SSI; - let mayLoad = 1, hasSideEffects = 0 in - def SSm_Int : SSI; + def SSm : SSI; + def SSm_Int : SSI; } } @@ -3097,26 +3046,6 @@ let Predicates = [HasAVX] in { /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int, OpndItins itins> { -let Predicates = [HasAVX], hasSideEffects = 0 in { - def V#NAME#SDr : SDI, VEX_4V, VEX_LIG; - let mayLoad = 1 in { - def V#NAME#SDm : SDI, VEX_4V, VEX_LIG; - def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG; - } -} - def SDr : SDI; @@ -3133,7 +3062,24 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; } -/// sse2_fp_unop_p - SSE2 unops in vector forms. +/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. +let hasSideEffects = 0 in +multiclass sse2_fp_unop_s_avx opc, string OpcodeStr> { + def SDr : SDI; + let mayLoad = 1 in { + def SDm : SDI; + def SDm_Int : SDI; + } +} + +/// sse2_fp_unop_p_new - SSE2 unops in vector forms. multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { @@ -3167,25 +3113,26 @@ let Predicates = [HasAVX] in { [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; } -// Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, - SSE_SQRTS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, - SSE_SQRTS>, +defm SQRT : sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; - -// Reciprocal approximations. Note that these typically require refinement -// in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, +defm RSQRT : sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, int_x86_avx_rsqrt_ps_256, SSE_SQRTP>; -defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, +defm RCP : sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, int_x86_avx_rcp_ps_256, SSE_RCPP>; +let Predicates = [HasAVX] in { + // Square root. + defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, + sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; + + // Reciprocal approximations. Note that these typically require refinement + // in order to obtain suitable precision. + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; + defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; +} + def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3239,11 +3186,49 @@ let Predicates = [HasAVX] in { (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; } +// Square root. +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, + SSE_SQRTS>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, + SSE_SQRTS>; + +/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. +multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { + def SSr : SSI; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I, XS, + Requires<[UseSSE1, OptForSize]>; + let Constraints = "$src1 = $dst" in { + def SSr_Int : SSI; + let mayLoad = 1, hasSideEffects = 0 in + def SSm_Int : SSI; + } +} + // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), (RSQRTSSr_Int VR128:$src, VR128:$src)>; +} + +defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>; +let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), (RCPSSr_Int VR128:$src, VR128:$src)>; } -- cgit v1.2.3-18-g5258 From 3cca7df7c76a668e3ffd0ca20e4324df897ede87 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Jan 2013 07:27:49 +0000 Subject: Merge SSE and AVX instruction definitions for PSHUFD/PSHUFHW/PSHUFLW. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171355 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 123 +++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 62 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a53ad61af8..661023ee4f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3892,78 +3892,77 @@ defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, //===---------------------------------------------------------------------===// let ExeDomain = SSEPackedInt in { -multiclass sse2_pshuffle { -def ri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; -def mi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (vt (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], - IIC_SSE_PSHUF>; -} - -multiclass sse2_pshuffle_y { -def Yri : Ii8<0x70, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>; -def Ymi : Ii8<0x70, MRMSrcMem, - (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (vt (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))]>; -} -} // ExeDomain = SSEPackedInt - +multiclass sse2_pshuffle { let Predicates = [HasAVX] in { - let AddedComplexity = 5 in - defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX; - - // SSE2 with ImmT == Imm8 and XS prefix. - defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX; - - // SSE2 with ImmT == Imm8 and XD prefix. - defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX; - - def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), - (VPSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (VPSHUFDri VR128:$src1, imm:$imm)>; + def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i8imm:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], + IIC_SSE_PSHUF>, VEX; + def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1, i8imm:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; } let Predicates = [HasAVX2] in { - defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, - TB, OpSize, VEX,VEX_L; - defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, - XS, VEX, VEX_L; - defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, - XD, VEX, VEX_L; + def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, i8imm:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], + IIC_SSE_PSHUF>, VEX, VEX_L; + def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), + (ins i256mem:$src1, i8imm:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; } let Predicates = [UseSSE2] in { - let AddedComplexity = 5 in - defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize; + def ri : Ii8<0x70, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], + IIC_SSE_PSHUF>; + def mi : Ii8<0x70, MRMSrcMem, + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), + (i8 imm:$src2))))], IIC_SSE_PSHUF>; +} +} +} // ExeDomain = SSEPackedInt - // SSE2 with ImmT == Imm8 and XS prefix. - defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS; +defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize; +defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS; +defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD; - // SSE2 with ImmT == Imm8 and XD prefix. - defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD; +let Predicates = [HasAVX] in { + def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>; +} - def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), - (PSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (PSHUFDri VR128:$src1, imm:$imm)>; +let Predicates = [UseSSE2] in { + def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; } //===---------------------------------------------------------------------===// -- cgit v1.2.3-18-g5258 From 3af932322859285988a4c53551540fc0658b2116 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Jan 2013 08:00:39 +0000 Subject: Merge SSE and AVX instruction definitions for scalar forms of SQRT, RSQRT, and RCP. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171356 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 179 +++++++++++++++++++++++------------------- 1 file changed, 97 insertions(+), 82 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 661023ee4f..ec7d20b831 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2936,6 +2936,26 @@ def SSE_RCPS : OpndItins< /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int, OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; + let mayLoad = 1 in { + def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; + def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; + } +} + def SSr : SSI; @@ -2955,19 +2975,50 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; } -/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. -multiclass sse1_fp_unop_s_avx opc, string OpcodeStr> { - def SSr : SSI; +/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. +multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; let mayLoad = 1 in { - def SSm : SSI; - def SSm_Int : SSI; + def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; + def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; + } +} + + def SSr : SSI; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I, XS, + Requires<[UseSSE1, OptForSize]>; + let Constraints = "$src1 = $dst" in { + def SSr_Int : SSI; + let mayLoad = 1, hasSideEffects = 0 in + def SSm_Int : SSI; } } @@ -3046,6 +3097,26 @@ let Predicates = [HasAVX] in { /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int, OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SDr : SDI, VEX_4V, VEX_LIG; + let mayLoad = 1 in { + def V#NAME#SDm : SDI, VEX_4V, VEX_LIG; + def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG; + } +} + def SDr : SDI; @@ -3062,24 +3133,7 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; } -/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. -let hasSideEffects = 0 in -multiclass sse2_fp_unop_s_avx opc, string OpcodeStr> { - def SDr : SDI; - let mayLoad = 1 in { - def SDm : SDI; - def SDm_Int : SDI; - } -} - -/// sse2_fp_unop_p_new - SSE2 unops in vector forms. +/// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { @@ -3113,26 +3167,25 @@ let Predicates = [HasAVX] in { [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; } -defm SQRT : sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, +// Square root. +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, + SSE_SQRTS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, + SSE_SQRTS>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; -defm RSQRT : sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, + +// Reciprocal approximations. Note that these typically require refinement +// in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, int_x86_avx_rsqrt_ps_256, SSE_SQRTP>; -defm RCP : sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, +defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, int_x86_avx_rcp_ps_256, SSE_RCPP>; -let Predicates = [HasAVX] in { - // Square root. - defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, - sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; - - // Reciprocal approximations. Note that these typically require refinement - // in order to obtain suitable precision. - defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; - defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; -} - def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3186,49 +3239,11 @@ let Predicates = [HasAVX] in { (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; } -// Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, - SSE_SQRTS>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, - SSE_SQRTS>; - -/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. -multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - def SSr : SSI; - // For scalar unary operations, fold a load into the operation - // only in OptForSize mode. It eliminates an instruction, but it also - // eliminates a whole-register clobber (the load), so it introduces a - // partial register update condition. - def SSm : I, XS, - Requires<[UseSSE1, OptForSize]>; - let Constraints = "$src1 = $dst" in { - def SSr_Int : SSI; - let mayLoad = 1, hasSideEffects = 0 in - def SSm_Int : SSI; - } -} - // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), (RSQRTSSr_Int VR128:$src, VR128:$src)>; -} - -defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>; -let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), (RCPSSr_Int VR128:$src, VR128:$src)>; } -- cgit v1.2.3-18-g5258 From 0b8c9a80f20772c3793201ab5b251d3520b9cea3 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 2 Jan 2013 11:36:10 +0000 Subject: Move all of the header files which are involved in modelling the LLVM IR into their new header subdirectory: include/llvm/IR. This matches the directory structure of lib, and begins to correct a long standing point of file layout clutter in LLVM. There are still more header files to move here, but I wanted to handle them in separate commits to make tracking what files make sense at each layer easier. The only really questionable files here are the target intrinsic tablegen files. But that's a battle I'd rather not fight today. I've updated both CMake and Makefile build systems (I think, and my tests think, but I may have missed something). I've also re-sorted the includes throughout the project. I'll be committing updates to Clang, DragonEgg, and Polly momentarily. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171366 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/TargetInfo/X86TargetInfo.cpp | 2 +- lib/Target/X86/X86AsmPrinter.cpp | 8 ++++---- lib/Target/X86/X86CodeEmitter.cpp | 2 +- lib/Target/X86/X86FastISel.cpp | 14 +++++++------- lib/Target/X86/X86FloatingPoint.cpp | 2 +- lib/Target/X86/X86FrameLowering.cpp | 4 ++-- lib/Target/X86/X86ISelDAGToDAG.cpp | 6 +++--- lib/Target/X86/X86ISelLowering.cpp | 18 +++++++++--------- lib/Target/X86/X86InstrInfo.cpp | 4 ++-- lib/Target/X86/X86JITInfo.cpp | 2 +- lib/Target/X86/X86JITInfo.h | 2 +- lib/Target/X86/X86MCInstLower.cpp | 2 +- lib/Target/X86/X86RegisterInfo.cpp | 6 +++--- lib/Target/X86/X86SelectionDAGInfo.cpp | 2 +- lib/Target/X86/X86Subtarget.cpp | 2 +- lib/Target/X86/X86Subtarget.h | 2 +- lib/Target/X86/X86TargetMachine.h | 2 +- 17 files changed, 40 insertions(+), 40 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp index 52a67f763b..815d23588f 100644 --- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp +++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "X86.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index d275e1dac2..5b3e0ba22d 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -20,26 +20,26 @@ #include "X86TargetMachine.h" #include "llvm/ADT/SmallString.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index df578fe098..bc77334347 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9d2dac2c9b..5facb7be57 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -19,19 +19,19 @@ #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 16197e09e9..0585b43a46 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -36,7 +36,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/InlineAsm.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index f70be1d2c1..420aeb85d3 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -23,8 +23,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c9918b900d..0b8e1c5fa3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -25,15 +25,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" using namespace llvm; STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 401b9b0346..5dae9dca95 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -23,7 +23,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VariadicFunction.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -31,14 +30,15 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 16c6b3d73c..17714acd86 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -24,8 +24,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 764aa5d4f2..cca391f647 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -16,7 +16,7 @@ #include "X86Relocations.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Valgrind.h" diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index 7b0f3434d0..f916327378 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -15,7 +15,7 @@ #define X86JITINFO_H #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetJITInfo.h" namespace llvm { diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 07740f16e8..5a1e1b8dc9 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -17,6 +17,7 @@ #include "X86COFFMachineModuleInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" -#include "llvm/Type.h" using namespace llvm; namespace { diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 07c0d148aa..58064b8695 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -28,8 +28,9 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -37,7 +38,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" #define GET_REGINFO_TARGET_DESC #include "X86GenRegisterInfo.inc" diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index af42b7c59e..757e8c70a4 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "x86-selectiondag-info" #include "X86TargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/DerivedTypes.h" using namespace llvm; X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index b4d554a64e..d493b78752 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" #include "X86InstrInfo.h" -#include "llvm/GlobalValue.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 387edc5698..44f38a1a91 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -15,7 +15,7 @@ #define X86SUBTARGET_H #include "llvm/ADT/Triple.h" -#include "llvm/CallingConv.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetSubtargetInfo.h" #include diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 57ff1f0d7b..a0749f0892 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -21,7 +21,7 @@ #include "X86JITInfo.h" #include "X86SelectionDAGInfo.h" #include "X86Subtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetTransformImpl.h" -- cgit v1.2.3-18-g5258 From e3b2489f4f3f28d65c0bc491a7d9e3d295f35723 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 2 Jan 2013 17:41:03 +0000 Subject: AVX: Fix a bug in WidenMaskArithmetic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171398 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5dae9dca95..4afc0d8197 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15974,7 +15974,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, } // Generate the wide operation. - SDValue Op = DAG.getNode(N->getOpcode(), DL, WideVT, N0, N1); + SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1); unsigned Opcode = N->getOpcode(); switch (Opcode) { case ISD::ANY_EXTEND: -- cgit v1.2.3-18-g5258 From 5bf3a28b36c5d95ad3732b749db651630027b09c Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Wed, 2 Jan 2013 21:20:15 +0000 Subject: Adds missing aliases for fcom and fcomp instructions without arguments. Patch by Michael M Kuperstein! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171414 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d0d236ab42..4522d895a4 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1910,6 +1910,8 @@ def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>; def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>; def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>; def : InstAlias<"fxch", (XCH_F ST1)>; +def : InstAlias<"fcom", (COM_FST0r ST1)>; +def : InstAlias<"fcomp", (COMP_FST0r ST1)>; def : InstAlias<"fcomi", (COM_FIr ST1)>; def : InstAlias<"fcompi", (COM_FIPr ST1)>; def : InstAlias<"fucom", (UCOM_Fr ST1)>; -- cgit v1.2.3-18-g5258 From 82860f63e1678077fe665c21179b9df47fd313bb Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 3 Jan 2013 02:34:09 +0000 Subject: Add a subtype parameter to VTTI::getShuffleCost In order to cost subvector insertion and extraction, we need to know the type of the subvector being extracted. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171453 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 +++-- lib/Target/X86/X86ISelLowering.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4afc0d8197..eca63f80ae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18303,10 +18303,11 @@ unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, unsigned X86VectorTargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index) const { + int Index, + Type *SubTp) const { // We only estimate the cost of reverse shuffles. if (Kind != Reverse) - return VectorTargetTransformImpl::getShuffleCost(Kind, Tp, Index); + return VectorTargetTransformImpl::getShuffleCost(Kind, Tp, Index, SubTp); std::pair LT = getTypeLegalizationCost(Tp); unsigned Cost = 1; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1b4b5eb65d..2e2fc2a234 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -974,7 +974,8 @@ namespace llvm { virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; - unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index) const; + unsigned getShuffleCost(ShuffleKind Kind, + Type *Tp, int Index, Type *SubTp) const; }; } -- cgit v1.2.3-18-g5258 From 56bc0ab09591b0dec6fcc65f8816b7c4764a0c6c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Jan 2013 06:40:20 +0000 Subject: Mark DIV/IDIV instructions hasSideEffects=1 because they can trap when dividing by 0. This is needed to keep early if conversion from moving them across basic blocks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171461 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d56763ea9d..3540fc3125 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -266,7 +266,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder -let hasSideEffects = 0 in { +let hasSideEffects = 1 in { // so that we don't speculatively execute let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; -- cgit v1.2.3-18-g5258 From e33a8b8c2f003b703b0af32effd8ef25477c2165 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Thu, 3 Jan 2013 08:18:30 +0000 Subject: Revert "Mark DIV/IDIV instructions hasSideEffects=1 because they can trap when dividing by 0. This is needed to keep early if conversion from moving them across basic blocks." This reverts commit r171461 since it breaks the following tests: Clang :: Analysis/outofbound-notwork.c Clang :: Analysis/string-fail.c Clang :: CXX/basic/basic.lookup/basic.lookup.qual/p6-0x.cpp Clang :: CXX/basic/basic.lookup/basic.lookup.unqual/p15.cpp Clang :: CXX/dcl.dcl/dcl.spec/dcl.fct.spec/p4.cpp Clang :: CXX/dcl.dcl/dcl.spec/dcl.stc/p10.cpp Clang :: CXX/temp/temp.param/p14.cpp Clang :: CXX/temp/temp.res/temp.dep.res/temp.point/p1.cpp Clang :: CodeGen/2009-02-13-zerosize-union-field-ppc.c Clang :: CodeGen/blocks-2.c Clang :: CodeGen/libcalls-d.c Clang :: CodeGen/libcalls-ld.c Clang :: CodeGenCXX/conversion-function.cpp Clang :: CodeGenCXX/debug-info-limit-type.cpp Clang :: CodeGenCXX/inheriting-constructor.cpp Clang :: FixIt/fixit-errors.c Clang :: FixIt/fixit-pmem.cpp Clang :: Modules/namespaces.cpp Clang :: PCH/changed-files.c Clang :: PCH/pr4489.c Clang :: PCH/source-manager-stack.c Clang :: Parser/cxx-ambig-decl-expr-xfail.cpp Clang :: SemaCXX/switch-implicit-fallthrough-cxx98.cpp Clang :: SemaTemplate/instantiate-function-1.mm git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171466 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 3540fc3125..d56763ea9d 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -266,7 +266,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder -let hasSideEffects = 1 in { // so that we don't speculatively execute +let hasSideEffects = 0 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; -- cgit v1.2.3-18-g5258 From ab7032090871abf6aeed86b2c4b836e97771d234 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 3 Jan 2013 08:48:33 +0000 Subject: Simplified TRUNCATE operation that comes after SETCC. It is possible since SETCC result is 0 or -1. Added a test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171467 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index eca63f80ae..9a553d61bf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14661,12 +14661,29 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } -/// PerformTruncateCombine - Converts truncate operation to -/// a sequence of vector shuffle operations. -/// It is possible when we truncate 256-bit vector to 128-bit vector +/// PerformTruncateCombine - In some cases a sequence with "truncate" +/// operation may be simplified. static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (DCI.isBeforeLegalize() || !VT.isVector()) + return SDValue(); + + SDValue In = N->getOperand(0); + // Optimize the sequence setcc -> truncate + if (In.getOpcode() == ISD::SETCC) { + DebugLoc DL = N->getDebugLoc(); + EVT InVT = In.getValueType(); + + // The vector element is all ones or all zero. Just take a half of it. + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), + InVT.getVectorNumElements()/2); + SDValue HalfVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, In, + DAG.getIntPtrConstant(0)); + assert(HalfVT.getSizeInBits() == VT.getSizeInBits()); + return DAG.getNode(ISD::BITCAST, DL, VT, HalfVec); + } return SDValue(); } -- cgit v1.2.3-18-g5258 From e12bf1875481b02d07b6ce9c153ec3410068e234 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 4 Jan 2013 17:35:21 +0000 Subject: Revert revision: 171467. This transformation is incorrect and makes some tests fail. Original message: Simplified TRUNCATE operation that comes after SETCC. It is possible since SETCC result is 0 or -1. Added a test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171468 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9a553d61bf..eca63f80ae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14661,29 +14661,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } -/// PerformTruncateCombine - In some cases a sequence with "truncate" -/// operation may be simplified. +/// PerformTruncateCombine - Converts truncate operation to +/// a sequence of vector shuffle operations. +/// It is possible when we truncate 256-bit vector to 128-bit vector static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); - if (DCI.isBeforeLegalize() || !VT.isVector()) - return SDValue(); - - SDValue In = N->getOperand(0); - // Optimize the sequence setcc -> truncate - if (In.getOpcode() == ISD::SETCC) { - DebugLoc DL = N->getDebugLoc(); - EVT InVT = In.getValueType(); - - // The vector element is all ones or all zero. Just take a half of it. - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), - InVT.getVectorNumElements()/2); - SDValue HalfVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, In, - DAG.getIntPtrConstant(0)); - assert(HalfVT.getSizeInBits() == VT.getSizeInBits()); - return DAG.getNode(ISD::BITCAST, DL, VT, HalfVec); - } return SDValue(); } -- cgit v1.2.3-18-g5258 From e503319874f57ab4a0354521b03a71cf8e07b866 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 4 Jan 2013 17:48:25 +0000 Subject: LoopVectorizer: 1. Add code to estimate register pressure. 2. Add code to select the unroll factor based on register pressure. 3. Add bits to TargetTransformInfo to provide the number of registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171469 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +++++++ lib/Target/X86/X86ISelLowering.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index eca63f80ae..f482ac9846 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18115,6 +18115,13 @@ X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const { return ST.hasSSE41() ? Fast : None; } +unsigned X86VectorTargetTransformInfo::getNumberOfRegisters(bool Vector) const { + const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); + if (ST.is64Bit()) + return 16; + return 8; +} + unsigned X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2e2fc2a234..86b7764c13 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -959,6 +959,8 @@ namespace llvm { explicit X86VectorTargetTransformInfo(const TargetLowering *TL) : VectorTargetTransformImpl(TL) {} + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, -- cgit v1.2.3-18-g5258 From dd30b471750aca5c652873f9a8972df162b7e5eb Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Fri, 4 Jan 2013 20:54:54 +0000 Subject: The current Intel Atom microarchitecture has a feature whereby when a function returns early then it is slightly faster to execute a sequence of NOP instructions to wait until the return address is ready, as opposed to simply stalling on the ret instruction until the return address is ready. When compiling for X86 Atom only, this patch will run a pass, called "X86PadShortFunction" which will add NOP instructions where less than four cycles elapse between function entry and return. It includes tests. Patch by Andy Zhang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171524 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/CMakeLists.txt | 1 + lib/Target/X86/X86.h | 5 + lib/Target/X86/X86.td | 9 +- lib/Target/X86/X86PadShortFunction.cpp | 184 +++++++++++++++++++++++++++++++++ lib/Target/X86/X86Subtarget.cpp | 1 + lib/Target/X86/X86Subtarget.h | 5 + lib/Target/X86/X86TargetMachine.cpp | 4 + 7 files changed, 206 insertions(+), 3 deletions(-) create mode 100644 lib/Target/X86/X86PadShortFunction.cpp (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 19912cc6bc..140c80dee4 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -25,6 +25,7 @@ set(sources X86JITInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp + X86PadShortFunction.cpp X86RegisterInfo.cpp X86SelectionDAGInfo.cpp X86Subtarget.cpp diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1e7b98d94f..88dbb6d67a 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -63,6 +63,11 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, /// FunctionPass *createEmitX86CodeToMemory(); +/// createX86PadShortFunctions - Return a pass that pads short functions +/// with NOOPs. This will prevent a stall when returning from the function +/// on the Atom. +FunctionPass *createX86PadShortFunctions(); + } // End llvm namespace #endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index b9d8cf7645..3ab2899365 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -123,8 +123,11 @@ def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; +def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", + "PadShortFunctions", "true", + "Pad short functions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -167,7 +170,7 @@ def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide]>; + FeatureSlowDivide, FeaturePadShortFunctions]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp new file mode 100644 index 0000000000..05f8a62a75 --- /dev/null +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -0,0 +1,184 @@ +//===-------- X86PadShortFunction.cpp - pad short functions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which will pad short functions to prevent +// a stall if a function returns before the return address is ready. This +// is needed for some Intel Atom processors. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#define DEBUG_TYPE "x86-pad-short-functions" +#include "X86.h" +#include "X86InstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +STATISTIC(NumBBsPadded, "Number of basic blocks padded"); + +namespace { + struct PadShortFunc : public MachineFunctionPass { + static char ID; + PadShortFunc() : MachineFunctionPass(ID) + , Threshold(4) + {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const + { + return "X86 Atom pad short functions"; + } + + private: + bool addPadding(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned int NOOPsToAdd); + + void findReturn(MachineFunction &MF, + MachineBasicBlock &MBB, + unsigned int Cycles); + + bool cyclesUntilReturn(MachineFunction &MF, + MachineBasicBlock &MBB, + unsigned int &Cycles, + MachineBasicBlock::iterator *Location = 0); + + const unsigned int Threshold; + std::map ReturnBBs; + }; + + char PadShortFunc::ID = 0; +} + +FunctionPass *llvm::createX86PadShortFunctions() { + return new PadShortFunc(); +} + +/// runOnMachineFunction - Loop over all of the basic blocks, inserting +/// NOOP instructions before early exits. +bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { + // Process all basic blocks. + ReturnBBs.clear(); + + // Search through basic blocks and mark the ones that have early returns + findReturn(MF, *MF.begin(), 0); + + int BBNum; + MachineBasicBlock::iterator ReturnLoc; + MachineBasicBlock *MBB; + + unsigned int Cycles = 0; + unsigned int BBCycles; + + // Pad the identified basic blocks with NOOPs + for (std::map::iterator I = ReturnBBs.begin(); + I != ReturnBBs.end(); ++I) { + BBNum = I->first; + Cycles = I->second; + + if (Cycles < Threshold) { + MBB = MF.getBlockNumbered(BBNum); + if (!cyclesUntilReturn(MF, *MBB, BBCycles, &ReturnLoc)) + continue; + + addPadding(MF, *MBB, ReturnLoc, Threshold - Cycles); + NumBBsPadded++; + } + } + + return false; +} + +/// findReturn - Starting at MBB, follow control flow and add all +/// basic blocks that contain a return to ReturnBBs. +void PadShortFunc::findReturn(MachineFunction &MF, + MachineBasicBlock &MBB, + unsigned int Cycles) +{ + // If this BB has a return, note how many cycles it takes to get there. + bool hasReturn = cyclesUntilReturn(MF, MBB, Cycles); + if (Cycles >= Threshold) + return; + + if (hasReturn) { + int BBNum = MBB.getNumber(); + ReturnBBs[BBNum] = std::max(ReturnBBs[BBNum], Cycles); + + return; + } + + // Follow branches in BB and look for returns + for (MachineBasicBlock::succ_iterator I = MBB.succ_begin(); + I != MBB.succ_end(); ++I) { + findReturn(MF, **I, Cycles); + } +} + +/// cyclesUntilReturn - if the MBB has a return instruction, set Location to +/// to the instruction and return true. Return false otherwise. +/// Cycles will be incremented by the number of cycles taken to reach the +/// return or the end of the BB, whichever occurs first. +bool PadShortFunc::cyclesUntilReturn(MachineFunction &MF, + MachineBasicBlock &MBB, + unsigned int &Cycles, + MachineBasicBlock::iterator *Location) +{ + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetMachine &Target = MF.getTarget(); + + for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end(); + ++MBBI) { + MachineInstr *MI = MBBI; + // Mark basic blocks with a return instruction. Calls to other functions + // do not count because the called function will be padded, if necessary + if (MI->isReturn() && !MI->isCall()) { + if (Location) + *Location = MBBI; + return true; + } + + Cycles += TII.getInstrLatency(Target.getInstrItineraryData(), MI); + } + + return false; +} + +/// addPadding - Add the given number of NOOP instructions to the function +/// right before the return at MBBI +bool PadShortFunc::addPadding(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned int NOOPsToAdd) +{ + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + DebugLoc DL = MBBI->getDebugLoc(); + + while (NOOPsToAdd-- > 0) { + // Since Atom has two instruction execution ports, + // the code emits two noops, which will be executed in parallell + // during one cycle. + BuildMI(MBB, MBBI, DL, TII.get(X86::NOOP)); + BuildMI(MBB, MBBI, DL, TII.get(X86::NOOP)); + } + + return true; +} + diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index d493b78752..53c28f4fce 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -350,6 +350,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , UseLeaForSP(false) , HasSlowDivide(false) , PostRAScheduler(false) + , PadShortFunctions(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 44f38a1a91..080f4cfeca 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -146,6 +146,10 @@ protected: /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; + /// PadShortFunctions - True if the short functions should be padded to prevent + /// a stall when returning too early. + bool PadShortFunctions; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -231,6 +235,7 @@ public: bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide() const { return HasSlowDivide; } + bool padShortFunctions() const { return PadShortFunctions; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index ea99796f35..8393f7e91e 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -190,6 +190,10 @@ bool X86PassConfig::addPreEmitPass() { addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } + if (getX86Subtarget().padShortFunctions()){ + addPass(createX86PadShortFunctions()); + ShouldPrint = true; + } return ShouldPrint; } -- cgit v1.2.3-18-g5258 From 18d0f12aa8761fab3ac83376b14d0b16b4e5e934 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Fri, 4 Jan 2013 23:01:26 +0000 Subject: Move 'break' to the right place to prevent fallthru. There is no test-case because conditions in the next case prevented from doing anything nasty. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171549 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 0b8e1c5fa3..c1eaa0e654 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1037,8 +1037,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal; return false; } - break; } + break; case ISD::SRL: { // Scale must not be used already. -- cgit v1.2.3-18-g5258 From 5d1f5c17377e56d88a525cf82d02e6e5df254580 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 5 Jan 2013 05:42:48 +0000 Subject: Revert revision 171524. Original message: URL: http://llvm.org/viewvc/llvm-project?rev=171524&view=rev Log: The current Intel Atom microarchitecture has a feature whereby when a function returns early then it is slightly faster to execute a sequence of NOP instructions to wait until the return address is ready, as opposed to simply stalling on the ret instruction until the return address is ready. When compiling for X86 Atom only, this patch will run a pass, called "X86PadShortFunction" which will add NOP instructions where less than four cycles elapse between function entry and return. It includes tests. Patch by Andy Zhang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171603 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/CMakeLists.txt | 1 - lib/Target/X86/X86.h | 5 - lib/Target/X86/X86.td | 9 +- lib/Target/X86/X86PadShortFunction.cpp | 184 --------------------------------- lib/Target/X86/X86Subtarget.cpp | 1 - lib/Target/X86/X86Subtarget.h | 5 - lib/Target/X86/X86TargetMachine.cpp | 4 - 7 files changed, 3 insertions(+), 206 deletions(-) delete mode 100644 lib/Target/X86/X86PadShortFunction.cpp (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 140c80dee4..19912cc6bc 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -25,7 +25,6 @@ set(sources X86JITInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp - X86PadShortFunction.cpp X86RegisterInfo.cpp X86SelectionDAGInfo.cpp X86Subtarget.cpp diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 88dbb6d67a..1e7b98d94f 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -63,11 +63,6 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, /// FunctionPass *createEmitX86CodeToMemory(); -/// createX86PadShortFunctions - Return a pass that pads short functions -/// with NOOPs. This will prevent a stall when returning from the function -/// on the Atom. -FunctionPass *createX86PadShortFunctions(); - } // End llvm namespace #endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 3ab2899365..b9d8cf7645 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -123,11 +123,8 @@ def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; -def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", - "PadShortFunctions", "true", - "Pad short functions">; + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -170,7 +167,7 @@ def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, FeaturePadShortFunctions]>; + FeatureSlowDivide]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp deleted file mode 100644 index 05f8a62a75..0000000000 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ /dev/null @@ -1,184 +0,0 @@ -//===-------- X86PadShortFunction.cpp - pad short functions -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the pass which will pad short functions to prevent -// a stall if a function returns before the return address is ready. This -// is needed for some Intel Atom processors. -// -//===----------------------------------------------------------------------===// - -#include -#include - -#define DEBUG_TYPE "x86-pad-short-functions" -#include "X86.h" -#include "X86InstrInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -using namespace llvm; - -STATISTIC(NumBBsPadded, "Number of basic blocks padded"); - -namespace { - struct PadShortFunc : public MachineFunctionPass { - static char ID; - PadShortFunc() : MachineFunctionPass(ID) - , Threshold(4) - {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const - { - return "X86 Atom pad short functions"; - } - - private: - bool addPadding(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned int NOOPsToAdd); - - void findReturn(MachineFunction &MF, - MachineBasicBlock &MBB, - unsigned int Cycles); - - bool cyclesUntilReturn(MachineFunction &MF, - MachineBasicBlock &MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location = 0); - - const unsigned int Threshold; - std::map ReturnBBs; - }; - - char PadShortFunc::ID = 0; -} - -FunctionPass *llvm::createX86PadShortFunctions() { - return new PadShortFunc(); -} - -/// runOnMachineFunction - Loop over all of the basic blocks, inserting -/// NOOP instructions before early exits. -bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { - // Process all basic blocks. - ReturnBBs.clear(); - - // Search through basic blocks and mark the ones that have early returns - findReturn(MF, *MF.begin(), 0); - - int BBNum; - MachineBasicBlock::iterator ReturnLoc; - MachineBasicBlock *MBB; - - unsigned int Cycles = 0; - unsigned int BBCycles; - - // Pad the identified basic blocks with NOOPs - for (std::map::iterator I = ReturnBBs.begin(); - I != ReturnBBs.end(); ++I) { - BBNum = I->first; - Cycles = I->second; - - if (Cycles < Threshold) { - MBB = MF.getBlockNumbered(BBNum); - if (!cyclesUntilReturn(MF, *MBB, BBCycles, &ReturnLoc)) - continue; - - addPadding(MF, *MBB, ReturnLoc, Threshold - Cycles); - NumBBsPadded++; - } - } - - return false; -} - -/// findReturn - Starting at MBB, follow control flow and add all -/// basic blocks that contain a return to ReturnBBs. -void PadShortFunc::findReturn(MachineFunction &MF, - MachineBasicBlock &MBB, - unsigned int Cycles) -{ - // If this BB has a return, note how many cycles it takes to get there. - bool hasReturn = cyclesUntilReturn(MF, MBB, Cycles); - if (Cycles >= Threshold) - return; - - if (hasReturn) { - int BBNum = MBB.getNumber(); - ReturnBBs[BBNum] = std::max(ReturnBBs[BBNum], Cycles); - - return; - } - - // Follow branches in BB and look for returns - for (MachineBasicBlock::succ_iterator I = MBB.succ_begin(); - I != MBB.succ_end(); ++I) { - findReturn(MF, **I, Cycles); - } -} - -/// cyclesUntilReturn - if the MBB has a return instruction, set Location to -/// to the instruction and return true. Return false otherwise. -/// Cycles will be incremented by the number of cycles taken to reach the -/// return or the end of the BB, whichever occurs first. -bool PadShortFunc::cyclesUntilReturn(MachineFunction &MF, - MachineBasicBlock &MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location) -{ - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - const TargetMachine &Target = MF.getTarget(); - - for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end(); - ++MBBI) { - MachineInstr *MI = MBBI; - // Mark basic blocks with a return instruction. Calls to other functions - // do not count because the called function will be padded, if necessary - if (MI->isReturn() && !MI->isCall()) { - if (Location) - *Location = MBBI; - return true; - } - - Cycles += TII.getInstrLatency(Target.getInstrItineraryData(), MI); - } - - return false; -} - -/// addPadding - Add the given number of NOOP instructions to the function -/// right before the return at MBBI -bool PadShortFunc::addPadding(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned int NOOPsToAdd) -{ - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - - DebugLoc DL = MBBI->getDebugLoc(); - - while (NOOPsToAdd-- > 0) { - // Since Atom has two instruction execution ports, - // the code emits two noops, which will be executed in parallell - // during one cycle. - BuildMI(MBB, MBBI, DL, TII.get(X86::NOOP)); - BuildMI(MBB, MBBI, DL, TII.get(X86::NOOP)); - } - - return true; -} - diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 53c28f4fce..d493b78752 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -350,7 +350,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , UseLeaForSP(false) , HasSlowDivide(false) , PostRAScheduler(false) - , PadShortFunctions(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 080f4cfeca..44f38a1a91 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -146,10 +146,6 @@ protected: /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; - /// PadShortFunctions - True if the short functions should be padded to prevent - /// a stall when returning too early. - bool PadShortFunctions; - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -235,7 +231,6 @@ public: bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide() const { return HasSlowDivide; } - bool padShortFunctions() const { return PadShortFunctions; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 8393f7e91e..ea99796f35 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -190,10 +190,6 @@ bool X86PassConfig::addPreEmitPass() { addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } - if (getX86Subtarget().padShortFunctions()){ - addPass(createX86PadShortFunctions()); - ShouldPrint = true; - } return ShouldPrint; } -- cgit v1.2.3-18-g5258 From 835e7bc48e101d92c8bbabf0b52cdcfe35b99c01 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 5 Jan 2013 07:39:25 +0000 Subject: Recommit r171461 which was incorrectly reverted. Mark DIV/IDIV instructions hasSideEffects=1 because they can trap when dividing by 0. This is needed to keep early if conversion from moving them across basic blocks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171608 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d56763ea9d..3540fc3125 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -266,7 +266,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder -let hasSideEffects = 0 in { +let hasSideEffects = 1 in { // so that we don't speculatively execute let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; -- cgit v1.2.3-18-g5258 From 700843ec2c4cb4aa442377809c6ebb3ae3030c61 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sun, 6 Jan 2013 19:00:15 +0000 Subject: Fix for PR14739. It's not safe to fold a load into a call across a store. Thanks to Nick Lewycky for the initial patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171665 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c1eaa0e654..935f9bd715 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -420,6 +420,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { if (!Chain.getNumOperands()) return false; + // Since we are not checking for AA here, conservatively abort if the chain + // writes to memory. It's not safe to move the callee (a load) across a store. + if (isa(Chain.getNode()) && + cast(Chain.getNode())->writeMem()) + return false; if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && -- cgit v1.2.3-18-g5258 From f564a9389da68266f44314fe38ab399fd2211134 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Jan 2013 20:39:29 +0000 Subject: Fix suffix handling for parsing and printing of cvtsi2ss, cvtsi2sd, cvtss2si, cvttss2si, cvtsd2si, and cvttsd2si to match gas behavior. cvtsi2* should parse with an 'l' or 'q' suffix or no suffix at all. No suffix should be treated the same as 'l' suffix. Printing should always print a suffix. Previously we didn't parse or print an 'l' suffix. cvtt*2si/cvt*2si should parse with an 'l' or 'q' suffix or not suffix at all. No suffix should use the destination register size to choose encoding. Printing should not print a suffix. Original 'l' suffix issue with cvtsi2* pointed out by Michael Kuperstein. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171668 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 127 +++++++++++++++++++++++++++++++++--------- 1 file changed, 100 insertions(+), 27 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ec7d20b831..95bb282b12 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1435,7 +1435,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si{q}\t{$src, $dst|$dst, $src}", + "cvttss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, @@ -1443,26 +1443,43 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + "cvttsd2si\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; +def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>; +def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; +def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>; +def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; +def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; +def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; +def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; +def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; + // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly // where appropriate to do so. -defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">, XS, VEX_4V, VEX_LIG; defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, VEX_4V, VEX_W, VEX_LIG; -defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, VEX_4V, VEX_LIG; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W, VEX_LIG; -def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>; -def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", +def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; +def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; let Predicates = [HasAVX] in { @@ -1489,27 +1506,49 @@ defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, XS; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si{q}\t{$src, $dst|$dst, $src}", + "cvttss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, REX_W; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + "cvttsd2si\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD, REX_W; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, - "cvtsi2ss\t{$src, $dst|$dst, $src}", + "cvtsi2ss{l}\t{$src, $dst|$dst, $src}", SSE_CVT_Scalar>, XS; defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", SSE_CVT_Scalar>, XS, REX_W; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, - "cvtsi2sd\t{$src, $dst|$dst, $src}", + "cvtsi2sd{l}\t{$src, $dst|$dst, $src}", SSE_CVT_Scalar>, XD; defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", SSE_CVT_Scalar>, XD, REX_W; +def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>; +def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; +def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>; +def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; +def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; +def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; +def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; +def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; + +def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", + (CVTSI2SSrm FR64:$dst, i32mem:$src)>; +def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", + (CVTSI2SDrm FR64:$dst, i32mem:$src)>; + // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). @@ -1544,27 +1583,27 @@ multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, - int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}", + int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}", + int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; + sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, - sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", SSE_CVT_Scalar, 0>, XS, VEX_4V; defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", SSE_CVT_Scalar, 0>, XS, VEX_4V, VEX_W; defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", SSE_CVT_Scalar, 0>, XD, VEX_4V; defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", @@ -1574,13 +1613,13 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, - "cvtsi2ss", SSE_CVT_Scalar>, XS; + "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, - "cvtsi2sd", SSE_CVT_Scalar>, XD; + "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; @@ -1594,40 +1633,40 @@ defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, SSE_CVT_SS2SI_32>, XS, VEX; defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si{q}", SSE_CVT_SS2SI_64>, + "cvttss2si", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W; defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD, VEX; defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si{q}", SSE_CVT_SD2SI>, + "cvttsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_W; defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS; defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W; + "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD; defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - ssmem, sse_load_f32, "cvtss2si{l}", + ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, - ssmem, sse_load_f32, "cvtss2si{q}", + ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - ssmem, sse_load_f32, "cvtss2si{l}", + ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_32>, XS; defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, - ssmem, sse_load_f32, "cvtss2si{q}", + ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_64>, XS, REX_W; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, @@ -1644,6 +1683,40 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, SSEPackedSingle, SSE_CVT_PS>, TB, Requires<[UseSSE2]>; +def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; +def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>; +def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>; +def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>; +def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; +def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; + +def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", + (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; +def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", + (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>; +def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTSD2SIrr GR32:$dst, VR128:$src), 0>; +def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>; +def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", + (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", + (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; +def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTSD2SI64rm GR64:$dst, sdmem:$src)>; + /// SSE 2 Only // Convert scalar double to scalar single -- cgit v1.2.3-18-g5258 From aeef83c6afa1e18d1cf9d359cc678ca0ad556175 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 7 Jan 2013 01:37:14 +0000 Subject: Switch TargetTransformInfo from an immutable analysis pass that requires a TargetMachine to construct (and thus isn't always available), to an analysis group that supports layered implementations much like AliasAnalysis does. This is a pretty massive change, with a few parts that I was unable to easily separate (sorry), so I'll walk through it. The first step of this conversion was to make TargetTransformInfo an analysis group, and to sink the nonce implementations in ScalarTargetTransformInfo and VectorTargetTranformInfo into a NoTargetTransformInfo pass. This allows other passes to add a hard requirement on TTI, and assume they will always get at least on implementation. The TargetTransformInfo analysis group leverages the delegation chaining trick that AliasAnalysis uses, where the base class for the analysis group delegates to the previous analysis *pass*, allowing all but tho NoFoo analysis passes to only implement the parts of the interfaces they support. It also introduces a new trick where each pass in the group retains a pointer to the top-most pass that has been initialized. This allows passes to implement one API in terms of another API and benefit when some other pass above them in the stack has more precise results for the second API. The second step of this conversion is to create a pass that implements the TargetTransformInfo analysis using the target-independent abstractions in the code generator. This replaces the ScalarTargetTransformImpl and VectorTargetTransformImpl classes in lib/Target with a single pass in lib/CodeGen called BasicTargetTransformInfo. This class actually provides most of the TTI functionality, basing it upon the TargetLowering abstraction and other information in the target independent code generator. The third step of the conversion adds support to all TargetMachines to register custom analysis passes. This allows building those passes with access to TargetLowering or other target-specific classes, and it also allows each target to customize the set of analysis passes desired in the pass manager. The baseline LLVMTargetMachine implements this interface to add the BasicTTI pass to the pass manager, and all of the tools that want to support target-aware TTI passes call this routine on whatever target machine they end up with to add the appropriate passes. The fourth step of the conversion created target-specific TTI analysis passes for the X86 and ARM backends. These passes contain the custom logic that was previously in their extensions of the ScalarTargetTransformInfo and VectorTargetTransformInfo interfaces. I separated them into their own file, as now all of the interface bits are private and they just expose a function to create the pass itself. Then I extended these target machines to set up a custom set of analysis passes, first adding BasicTTI as a fallback, and then adding their customized TTI implementations. The fourth step required logic that was shared between the target independent layer and the specific targets to move to a different interface, as they no longer derive from each other. As a consequence, a helper functions were added to TargetLowering representing the common logic needed both in the target implementation and the codegen implementation of the TTI pass. While technically this is the only change that could have been committed separately, it would have been a nightmare to extract. The final step of the conversion was just to delete all the old boilerplate. This got rid of the ScalarTargetTransformInfo and VectorTargetTransformInfo classes, all of the support in all of the targets for producing instances of them, and all of the support in the tools for manually constructing a pass based around them. Now that TTI is a relatively normal analysis group, two things become straightforward. First, we can sink it into lib/Analysis which is a more natural layer for it to live. Second, clients of this interface can depend on it *always* being available which will simplify their code and behavior. These (and other) simplifications will follow in subsequent commits, this one is clearly big enough. Finally, I'm very aware that much of the comments and documentation needs to be updated. As soon as I had this working, and plausibly well commented, I wanted to get it committed and in front of the build bots. I'll be doing a few passes over documentation later if it sticks. Commits to update DragonEgg and Clang will be made presently. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171681 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/CMakeLists.txt | 1 + lib/Target/X86/X86.h | 3 + lib/Target/X86/X86ISelLowering.cpp | 262 ---------------------- lib/Target/X86/X86ISelLowering.h | 35 --- lib/Target/X86/X86TargetMachine.cpp | 19 +- lib/Target/X86/X86TargetMachine.h | 20 +- lib/Target/X86/X86TargetTransformInfo.cpp | 355 ++++++++++++++++++++++++++++++ 7 files changed, 377 insertions(+), 318 deletions(-) create mode 100644 lib/Target/X86/X86TargetTransformInfo.cpp (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 19912cc6bc..95f1f22e04 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -30,6 +30,7 @@ set(sources X86Subtarget.cpp X86TargetMachine.cpp X86TargetObjectFile.cpp + X86TargetTransformInfo.cpp X86VZeroUpper.cpp ) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1e7b98d94f..2bff7ab15a 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -63,6 +63,9 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, /// FunctionPass *createEmitX86CodeToMemory(); +/// \brief Creates an X86-specific Target Transformation Info pass. +ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM); + } // End llvm namespace #endif diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f482ac9846..4b00b46e73 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18063,265 +18063,3 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return Res; } - -//===----------------------------------------------------------------------===// -// -// X86 cost model. -// -//===----------------------------------------------------------------------===// - -struct X86CostTblEntry { - int ISD; - MVT Type; - unsigned Cost; -}; - -static int -FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) - return i; - - // Could not find an entry. - return -1; -} - -struct X86TypeConversionCostTblEntry { - int ISD; - MVT Dst; - MVT Src; - unsigned Cost; -}; - -static int -FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, - int ISD, MVT Dst, MVT Src) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) - return i; - - // Could not find an entry. - return -1; -} - -ScalarTargetTransformInfo::PopcntHwSupport -X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const { - assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - - // TODO: Currently the __builtin_popcount() implementation using SSE3 - // instructions is inefficient. Once the problem is fixed, we should - // call ST.hasSSE3() instead of ST.hasSSE4(). - return ST.hasSSE41() ? Fast : None; -} - -unsigned X86VectorTargetTransformInfo::getNumberOfRegisters(bool Vector) const { - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - if (ST.is64Bit()) - return 16; - return 8; -} - -unsigned -X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, - Type *Ty) const { - // Legalize the type. - std::pair LT = getTypeLegalizationCost(Ty); - - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - - static const X86CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::MUL, MVT::v4i64, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - }; - - // Look for AVX1 lowering tricks. - if (ST.hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, - LT.second); - if (Idx != -1) - return LT.first * AVX1CostTable[Idx].Cost; - } - // Fallback to the default implementation. - return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); -} - -unsigned -X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - // Legalize the type. - std::pair LT = getTypeLegalizationCost(Src); - assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && - "Invalid Opcode"); - - const X86Subtarget &ST = - TLI->getTargetMachine().getSubtarget(); - - // Each load/store unit costs 1. - unsigned Cost = LT.first * 1; - - // On Sandybridge 256bit load/stores are double pumped - // (but not on Haswell). - if (LT.second.getSizeInBits() > 128 && !ST.hasAVX2()) - Cost*=2; - - return Cost; -} - -unsigned -X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - assert(Val->isVectorTy() && "This must be a vector type"); - - if (Index != -1U) { - // Legalize the type. - std::pair LT = getTypeLegalizationCost(Val); - - // This type is legalized to a scalar type. - if (!LT.second.isVector()) - return 0; - - // The type may be split. Normalize the index to the new type. - unsigned Width = LT.second.getVectorNumElements(); - Index = Index % Width; - - // Floating point scalars are already located in index #0. - if (Val->getScalarType()->isFloatingPointTy() && Index == 0) - return 0; - } - - return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index); -} - -unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, - Type *ValTy, - Type *CondTy) const { - // Legalize the type. - std::pair LT = getTypeLegalizationCost(ValTy); - - MVT MTy = LT.second; - - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - const X86Subtarget &ST = - TLI->getTargetMachine().getSubtarget(); - - static const X86CostTblEntry SSE42CostTbl[] = { - { ISD::SETCC, MVT::v2f64, 1 }, - { ISD::SETCC, MVT::v4f32, 1 }, - { ISD::SETCC, MVT::v2i64, 1 }, - { ISD::SETCC, MVT::v4i32, 1 }, - { ISD::SETCC, MVT::v8i16, 1 }, - { ISD::SETCC, MVT::v16i8, 1 }, - }; - - static const X86CostTblEntry AVX1CostTbl[] = { - { ISD::SETCC, MVT::v4f64, 1 }, - { ISD::SETCC, MVT::v8f32, 1 }, - // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, MVT::v4i64, 4 }, - { ISD::SETCC, MVT::v8i32, 4 }, - { ISD::SETCC, MVT::v16i16, 4 }, - { ISD::SETCC, MVT::v32i8, 4 }, - }; - - static const X86CostTblEntry AVX2CostTbl[] = { - { ISD::SETCC, MVT::v4i64, 1 }, - { ISD::SETCC, MVT::v8i32, 1 }, - { ISD::SETCC, MVT::v16i16, 1 }, - { ISD::SETCC, MVT::v32i8, 1 }, - }; - - if (ST.hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; - } - - if (ST.hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTbl[Idx].Cost; - } - - if (ST.hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; - } - - return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); -} - -unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, - Type *Dst, - Type *Src) const { - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); - - if (!SrcTy.isSimple() || !DstTy.isSimple()) - return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); - - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, - }; - - if (ST.hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, - array_lengthof(AVXConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return AVXConversionTbl[Idx].Cost; - } - - return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); -} - - -unsigned X86VectorTargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, - Type *SubTp) const { - // We only estimate the cost of reverse shuffles. - if (Kind != Reverse) - return VectorTargetTransformImpl::getShuffleCost(Kind, Tp, Index, SubTp); - - std::pair LT = getTypeLegalizationCost(Tp); - unsigned Cost = 1; - if (LT.second.getSizeInBits() > 128) - Cost = 3; // Extract + insert + copy. - - // Multiple by the number of parts. - return Cost * LT.first; -} - diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 86b7764c13..16ce364cd5 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -23,7 +23,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { namespace X86ISD { @@ -945,40 +944,6 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } - - class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl { - public: - explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) : - ScalarTargetTransformImpl(TL) {}; - - virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const; - }; - - class X86VectorTargetTransformInfo : public VectorTargetTransformImpl { - public: - explicit X86VectorTargetTransformInfo(const TargetLowering *TL) : - VectorTargetTransformImpl(TL) {} - - virtual unsigned getNumberOfRegisters(bool Vector) const; - - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; - - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const; - - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; - - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; - - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; - - unsigned getShuffleCost(ShuffleKind Kind, - Type *Tp, int Index, Type *SubTp) const; - }; } #endif // X86ISELLOWERING_H diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index ea99796f35..847e06b960 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -48,8 +48,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TLInfo(*this), TSInfo(*this), - JITInfo(*this), - STTI(&TLInfo), VTTI(&TLInfo) { + JITInfo(*this) { } void X86_64TargetMachine::anchor() { } @@ -65,8 +64,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TLInfo(*this), TSInfo(*this), - JITInfo(*this), - STTI(&TLInfo), VTTI(&TLInfo){ + JITInfo(*this) { } /// X86TargetMachine ctor - Create an X86 target. @@ -120,6 +118,19 @@ static cl::opt X86EarlyIfConv("x86-early-ifcvt", cl::desc("Enable early if-conversion on X86")); +//===----------------------------------------------------------------------===// +// X86 Analysis Pass Setup +//===----------------------------------------------------------------------===// + +void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our X86 pass. This + // allows the X86 pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createX86TargetTransformInfoPass(this)); +} + + //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index a0749f0892..174d391831 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -24,7 +24,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -65,6 +64,9 @@ public: return &InstrItins; } + /// \brief Register X86 analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); + // Set up the pass pipeline. virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); @@ -81,8 +83,6 @@ class X86_32TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; - ScalarTargetTransformImpl STTI; - X86VectorTargetTransformInfo VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -101,12 +101,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; /// X86_64TargetMachine - X86 64-bit target machine. @@ -118,8 +112,6 @@ class X86_64TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; - X86ScalarTargetTransformImpl STTI; - X86VectorTargetTransformInfo VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -138,12 +130,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp new file mode 100644 index 0000000000..f5aa57740f --- /dev/null +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -0,0 +1,355 @@ +//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// X86 target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86tti" +#include "X86.h" +#include "X86TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/TargetTransformInfo.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeX86TTIPass(PassRegistry &); +} + +namespace { + +class X86TTI : public ImmutablePass, public TargetTransformInfo { + const X86TargetMachine *TM; + const X86Subtarget *ST; + const X86TargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + X86TTI(const X86TargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializeX86TTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti", + "X86 Target Transform Info", true, true, false) +char X86TTI::ID = 0; + +ImmutablePass * +llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { + return new X86TTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// X86 cost model. +// +//===----------------------------------------------------------------------===// + +namespace { +struct X86CostTblEntry { + int ISD; + MVT Type; + unsigned Cost; +}; +} + +static int +FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) + return i; + + // Could not find an entry. + return -1; +} + +namespace { +struct X86TypeConversionCostTblEntry { + int ISD; + MVT Dst; + MVT Src; + unsigned Cost; +}; +} + +static int +FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, + int ISD, MVT Dst, MVT Src) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) + return i; + + // Could not find an entry. + return -1; +} + + +X86TTI::PopcntHwSupport X86TTI::getPopcntHwSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // TODO: Currently the __builtin_popcount() implementation using SSE3 + // instructions is inefficient. Once the problem is fixed, we should + // call ST->hasSSE3() instead of ST->hasSSE4(). + return ST->hasSSE41() ? Fast : None; +} + +unsigned X86TTI::getNumberOfRegisters(bool Vector) const { + if (ST->is64Bit()) + return 16; + return 8; +} + +unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const X86CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::MUL, MVT::v4i64, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + }; + + // Look for AVX1 lowering tricks. + if (ST->hasAVX()) { + int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + LT.second); + if (Idx != -1) + return LT.first * AVX1CostTable[Idx].Cost; + } + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + // We only estimate the cost of reverse shuffles. + if (Kind != Reverse) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + std::pair LT = TLI->getTypeLegalizationCost(Tp); + unsigned Cost = 1; + if (LT.second.getSizeInBits() > 128) + Cost = 3; // Extract + insert + copy. + + // Multiple by the number of parts. + return Cost * LT.first; +} + +unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + + static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, + }; + + if (ST->hasAVX()) { + int Idx = FindInConvertTable(AVXConversionTbl, + array_lengthof(AVXConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVXConversionTbl[Idx].Cost; + } + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(ValTy); + + MVT MTy = LT.second; + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const X86CostTblEntry SSE42CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + { ISD::SETCC, MVT::v2i64, 1 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, + }; + + static const X86CostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1 }, + { ISD::SETCC, MVT::v8f32, 1 }, + // AVX1 does not support 8-wide integer compare. + { ISD::SETCC, MVT::v4i64, 4 }, + { ISD::SETCC, MVT::v8i32, 4 }, + { ISD::SETCC, MVT::v16i16, 4 }, + { ISD::SETCC, MVT::v32i8, 4 }, + }; + + static const X86CostTblEntry AVX2CostTbl[] = { + { ISD::SETCC, MVT::v4i64, 1 }, + { ISD::SETCC, MVT::v8i32, 1 }, + { ISD::SETCC, MVT::v16i16, 1 }, + { ISD::SETCC, MVT::v32i8, 1 }, + }; + + if (ST->hasAVX2()) { + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX2CostTbl[Idx].Cost; + } + + if (ST->hasAVX()) { + int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTbl[Idx].Cost; + } + + if (ST->hasSSE42()) { + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTbl[Idx].Cost; + } + + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + if (Index != -1U) { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Val); + + // This type is legalized to a scalar type. + if (!LT.second.isVector()) + return 0; + + // The type may be split. Normalize the index to the new type. + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + + // Floating point scalars are already located in index #0. + if (Val->getScalarType()->isFloatingPointTy() && Index == 0) + return 0; + } + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // On Sandybridge 256bit load/stores are double pumped + // (but not on Haswell). + if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2()) + Cost*=2; + + return Cost; +} -- cgit v1.2.3-18-g5258 From be04929f7fd76a921540e9901f24563e51dc1219 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 7 Jan 2013 03:08:10 +0000 Subject: Move TargetTransformInfo to live under the Analysis library. This no longer would violate any dependency layering and it is in fact an analysis. =] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171686 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index f5aa57740f..c98681b629 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -17,9 +17,9 @@ #define DEBUG_TYPE "x86tti" #include "X86.h" #include "X86TargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/TargetTransformInfo.h" using namespace llvm; // Declare the pass initialization routine locally as target-specific passes -- cgit v1.2.3-18-g5258 From d1b8ef97c47d347f2a2261a0d6de4872f248321f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 7 Jan 2013 03:16:03 +0000 Subject: Make the popcnt support enums and methods have more clear names and follow the conding conventions regarding enumerating a set of "kinds" of things. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171687 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index c98681b629..8a699afa6a 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -76,7 +76,7 @@ public: /// \name Scalar TTI Implementations /// @{ - virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const; + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; /// @} @@ -157,12 +157,12 @@ FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, } -X86TTI::PopcntHwSupport X86TTI::getPopcntHwSupport(unsigned TyWidth) const { +X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 // instructions is inefficient. Once the problem is fixed, we should // call ST->hasSSE3() instead of ST->hasSSE4(). - return ST->hasSSE41() ? Fast : None; + return ST->hasSSE41() ? PSK_FastHardware : PSK_Software; } unsigned X86TTI::getNumberOfRegisters(bool Vector) const { -- cgit v1.2.3-18-g5258 From bb00800ff46e7a2a628d0a6741a7f0422c74c198 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 7 Jan 2013 03:20:02 +0000 Subject: Fix the enumerator names for ShuffleKind to match tho coding standards, and make its comments doxygen comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171688 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 8a699afa6a..9cc1b180e9 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -204,7 +204,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { // We only estimate the cost of reverse shuffles. - if (Kind != Reverse) + if (Kind != SK_Reverse) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); std::pair LT = TLI->getTypeLegalizationCost(Tp); -- cgit v1.2.3-18-g5258 From df3bf55d49883a076c8599c3b57f0707b8dc0335 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Jan 2013 05:04:39 +0000 Subject: Remove unnecessary # tokens at the beginning and end of defm names. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171694 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 95bb282b12..3175324b4c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2732,17 +2732,17 @@ multiclass PDI_binop_all opc, string OpcodeStr, SDNode Opcode, ValueType OpVT128, ValueType OpVT256, OpndItins itins, bit IsCommutable = 0> { let Predicates = [HasAVX] in - defm V#NAME# : PDI_binop_rm, VEX_4V; let Constraints = "$src1 = $dst" in - defm #NAME# : PDI_binop_rm; + defm NAME : PDI_binop_rm; let Predicates = [HasAVX2] in defm V#NAME#Y : PDI_binop_rm, VEX_4V, VEX_L; + OpVT256, VR256, memopv4i64, i256mem, itins, + IsCommutable, 0>, VEX_4V, VEX_L; } // These are ordered here for pattern ordering requirements with the fp versions @@ -3635,13 +3635,13 @@ multiclass PDI_binop_all_int opc, string OpcodeStr, Intrinsic IntId128, Intrinsic IntId256, OpndItins itins, bit IsCommutable = 0> { let Predicates = [HasAVX] in - defm V#NAME# : PDI_binop_rm_int, VEX_4V; + defm V#NAME : PDI_binop_rm_int, VEX_4V; let Constraints = "$src1 = $dst" in - defm #NAME# : PDI_binop_rm_int; + defm NAME : PDI_binop_rm_int; let Predicates = [HasAVX2] in defm V#NAME#Y : PDI_binop_rm_int Date: Mon, 7 Jan 2013 05:26:58 +0000 Subject: Remove # from the beginning and end of def names. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171696 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 246 ++++++++++++++++----------------- lib/Target/X86/X86InstrCMovSetCC.td | 12 +- lib/Target/X86/X86InstrCompiler.td | 256 +++++++++++++++++------------------ lib/Target/X86/X86InstrInfo.td | 52 +++---- 4 files changed, 283 insertions(+), 283 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 3540fc3125..0eecd5ffc1 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -933,60 +933,60 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let Constraints = "$src1 = $dst" in { let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def #NAME#8rr : BinOpRR_RF; - def #NAME#16rr : BinOpRR_RF; - def #NAME#32rr : BinOpRR_RF; - def #NAME#64rr : BinOpRR_RF; + def NAME#8rr : BinOpRR_RF; + def NAME#16rr : BinOpRR_RF; + def NAME#32rr : BinOpRR_RF; + def NAME#64rr : BinOpRR_RF; } // isCommutable - def #NAME#8rr_REV : BinOpRR_Rev; - def #NAME#16rr_REV : BinOpRR_Rev; - def #NAME#32rr_REV : BinOpRR_Rev; - def #NAME#64rr_REV : BinOpRR_Rev; + def NAME#8rr_REV : BinOpRR_Rev; + def NAME#16rr_REV : BinOpRR_Rev; + def NAME#32rr_REV : BinOpRR_Rev; + def NAME#64rr_REV : BinOpRR_Rev; - def #NAME#8rm : BinOpRM_RF; - def #NAME#16rm : BinOpRM_RF; - def #NAME#32rm : BinOpRM_RF; - def #NAME#64rm : BinOpRM_RF; + def NAME#8rm : BinOpRM_RF; + def NAME#16rm : BinOpRM_RF; + def NAME#32rm : BinOpRM_RF; + def NAME#64rm : BinOpRM_RF; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; - def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>; - def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>; - - def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>; - def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>; - def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>; + def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; + def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>; + def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>; + + def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>; + def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>; + def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>; } } // Constraints = "$src1 = $dst" - def #NAME#8mr : BinOpMR_RMW; - def #NAME#16mr : BinOpMR_RMW; - def #NAME#32mr : BinOpMR_RMW; - def #NAME#64mr : BinOpMR_RMW; + def NAME#8mr : BinOpMR_RMW; + def NAME#16mr : BinOpMR_RMW; + def NAME#32mr : BinOpMR_RMW; + def NAME#64mr : BinOpMR_RMW; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def #NAME#16mi8 : BinOpMI8_RMW; - def #NAME#32mi8 : BinOpMI8_RMW; - def #NAME#64mi8 : BinOpMI8_RMW; - - def #NAME#8mi : BinOpMI_RMW; - def #NAME#16mi : BinOpMI_RMW; - def #NAME#32mi : BinOpMI_RMW; - def #NAME#64mi32 : BinOpMI_RMW; - - def #NAME#8i8 : BinOpAI; - def #NAME#16i16 : BinOpAI; - def #NAME#32i32 : BinOpAI; - def #NAME#64i32 : BinOpAI; + def NAME#16mi8 : BinOpMI8_RMW; + def NAME#32mi8 : BinOpMI8_RMW; + def NAME#64mi8 : BinOpMI8_RMW; + + def NAME#8mi : BinOpMI_RMW; + def NAME#16mi : BinOpMI_RMW; + def NAME#32mi : BinOpMI_RMW; + def NAME#64mi32 : BinOpMI_RMW; + + def NAME#8i8 : BinOpAI; + def NAME#16i16 : BinOpAI; + def NAME#32i32 : BinOpAI; + def NAME#64i32 : BinOpAI; } } @@ -1004,60 +1004,60 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let Constraints = "$src1 = $dst" in { let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def #NAME#8rr : BinOpRR_RFF; - def #NAME#16rr : BinOpRR_RFF; - def #NAME#32rr : BinOpRR_RFF; - def #NAME#64rr : BinOpRR_RFF; + def NAME#8rr : BinOpRR_RFF; + def NAME#16rr : BinOpRR_RFF; + def NAME#32rr : BinOpRR_RFF; + def NAME#64rr : BinOpRR_RFF; } // isCommutable - def #NAME#8rr_REV : BinOpRR_Rev; - def #NAME#16rr_REV : BinOpRR_Rev; - def #NAME#32rr_REV : BinOpRR_Rev; - def #NAME#64rr_REV : BinOpRR_Rev; + def NAME#8rr_REV : BinOpRR_Rev; + def NAME#16rr_REV : BinOpRR_Rev; + def NAME#32rr_REV : BinOpRR_Rev; + def NAME#64rr_REV : BinOpRR_Rev; - def #NAME#8rm : BinOpRM_RFF; - def #NAME#16rm : BinOpRM_RFF; - def #NAME#32rm : BinOpRM_RFF; - def #NAME#64rm : BinOpRM_RFF; + def NAME#8rm : BinOpRM_RFF; + def NAME#16rm : BinOpRM_RFF; + def NAME#32rm : BinOpRM_RFF; + def NAME#64rm : BinOpRM_RFF; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>; - def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>; - def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>; - - def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>; - def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>; - def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>; - def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>; + def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>; + def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>; + def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>; + + def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>; + def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>; + def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>; } } // Constraints = "$src1 = $dst" - def #NAME#8mr : BinOpMR_RMW_FF; - def #NAME#16mr : BinOpMR_RMW_FF; - def #NAME#32mr : BinOpMR_RMW_FF; - def #NAME#64mr : BinOpMR_RMW_FF; + def NAME#8mr : BinOpMR_RMW_FF; + def NAME#16mr : BinOpMR_RMW_FF; + def NAME#32mr : BinOpMR_RMW_FF; + def NAME#64mr : BinOpMR_RMW_FF; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def #NAME#16mi8 : BinOpMI8_RMW_FF; - def #NAME#32mi8 : BinOpMI8_RMW_FF; - def #NAME#64mi8 : BinOpMI8_RMW_FF; - - def #NAME#8mi : BinOpMI_RMW_FF; - def #NAME#16mi : BinOpMI_RMW_FF; - def #NAME#32mi : BinOpMI_RMW_FF; - def #NAME#64mi32 : BinOpMI_RMW_FF; - - def #NAME#8i8 : BinOpAI; - def #NAME#16i16 : BinOpAI; - def #NAME#32i32 : BinOpAI; - def #NAME#64i32 : BinOpAI; + def NAME#16mi8 : BinOpMI8_RMW_FF; + def NAME#32mi8 : BinOpMI8_RMW_FF; + def NAME#64mi8 : BinOpMI8_RMW_FF; + + def NAME#8mi : BinOpMI_RMW_FF; + def NAME#16mi : BinOpMI_RMW_FF; + def NAME#32mi : BinOpMI_RMW_FF; + def NAME#64mi32 : BinOpMI_RMW_FF; + + def NAME#8i8 : BinOpAI; + def NAME#16i16 : BinOpAI; + def NAME#32i32 : BinOpAI; + def NAME#64i32 : BinOpAI; } } @@ -1072,59 +1072,59 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let Defs = [EFLAGS] in { let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def #NAME#8rr : BinOpRR_F; - def #NAME#16rr : BinOpRR_F; - def #NAME#32rr : BinOpRR_F; - def #NAME#64rr : BinOpRR_F; + def NAME#8rr : BinOpRR_F; + def NAME#16rr : BinOpRR_F; + def NAME#32rr : BinOpRR_F; + def NAME#64rr : BinOpRR_F; } // isCommutable - def #NAME#8rr_REV : BinOpRR_F_Rev; - def #NAME#16rr_REV : BinOpRR_F_Rev; - def #NAME#32rr_REV : BinOpRR_F_Rev; - def #NAME#64rr_REV : BinOpRR_F_Rev; + def NAME#8rr_REV : BinOpRR_F_Rev; + def NAME#16rr_REV : BinOpRR_F_Rev; + def NAME#32rr_REV : BinOpRR_F_Rev; + def NAME#64rr_REV : BinOpRR_F_Rev; - def #NAME#8rm : BinOpRM_F; - def #NAME#16rm : BinOpRM_F; - def #NAME#32rm : BinOpRM_F; - def #NAME#64rm : BinOpRM_F; + def NAME#8rm : BinOpRM_F; + def NAME#16rm : BinOpRM_F; + def NAME#32rm : BinOpRM_F; + def NAME#64rm : BinOpRM_F; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>; - def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>; - def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>; - - def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; - def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; - def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; - def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>; + def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>; + def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>; + def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>; + + def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; + def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; + def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>; } - def #NAME#8mr : BinOpMR_F; - def #NAME#16mr : BinOpMR_F; - def #NAME#32mr : BinOpMR_F; - def #NAME#64mr : BinOpMR_F; + def NAME#8mr : BinOpMR_F; + def NAME#16mr : BinOpMR_F; + def NAME#32mr : BinOpMR_F; + def NAME#64mr : BinOpMR_F; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def #NAME#16mi8 : BinOpMI8_F; - def #NAME#32mi8 : BinOpMI8_F; - def #NAME#64mi8 : BinOpMI8_F; - - def #NAME#8mi : BinOpMI_F; - def #NAME#16mi : BinOpMI_F; - def #NAME#32mi : BinOpMI_F; - def #NAME#64mi32 : BinOpMI_F; - - def #NAME#8i8 : BinOpAI; - def #NAME#16i16 : BinOpAI; - def #NAME#32i32 : BinOpAI; - def #NAME#64i32 : BinOpAI; + def NAME#16mi8 : BinOpMI8_F; + def NAME#32mi8 : BinOpMI8_F; + def NAME#64mi8 : BinOpMI8_F; + + def NAME#8mi : BinOpMI_F; + def NAME#16mi : BinOpMI_F; + def NAME#32mi : BinOpMI_F; + def NAME#64mi32 : BinOpMI_F; + + def NAME#8i8 : BinOpAI; + def NAME#16i16 : BinOpAI; + def NAME#32i32 : BinOpAI; + def NAME#64i32 : BinOpAI; } } diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index adeaf5410d..8f2d0a1aae 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -17,19 +17,19 @@ multiclass CMOV opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", isCommutable = 1 in { - def #NAME#16rr + def NAME#16rr : I,TB,OpSize; - def #NAME#32rr + def NAME#32rr : I, TB; - def #NAME#64rr + def NAME#64rr :RI opc, string Mnemonic, PatLeaf CondNode> { } let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { - def #NAME#16rm + def NAME#16rm : I, TB, OpSize; - def #NAME#32rm + def NAME#32rm : I, TB; - def #NAME#64rm + def NAME#64rm :RI { let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { - def #NAME#8 : I<0, Pseudo, (outs GR8:$dst), - (ins i8mem:$ptr, GR8:$val), - !strconcat(mnemonic, "8 PSEUDO!"), []>; - def #NAME#16 : I<0, Pseudo,(outs GR16:$dst), - (ins i16mem:$ptr, GR16:$val), - !strconcat(mnemonic, "16 PSEUDO!"), []>; - def #NAME#32 : I<0, Pseudo, (outs GR32:$dst), - (ins i32mem:$ptr, GR32:$val), - !strconcat(mnemonic, "32 PSEUDO!"), []>; - def #NAME#64 : I<0, Pseudo, (outs GR64:$dst), - (ins i64mem:$ptr, GR64:$val), - !strconcat(mnemonic, "64 PSEUDO!"), []>; + def NAME#8 : I<0, Pseudo, (outs GR8:$dst), + (ins i8mem:$ptr, GR8:$val), + !strconcat(mnemonic, "8 PSEUDO!"), []>; + def NAME#16 : I<0, Pseudo,(outs GR16:$dst), + (ins i16mem:$ptr, GR16:$val), + !strconcat(mnemonic, "16 PSEUDO!"), []>; + def NAME#32 : I<0, Pseudo, (outs GR32:$dst), + (ins i32mem:$ptr, GR32:$val), + !strconcat(mnemonic, "32 PSEUDO!"), []>; + def NAME#64 : I<0, Pseudo, (outs GR64:$dst), + (ins i64mem:$ptr, GR64:$val), + !strconcat(mnemonic, "64 PSEUDO!"), []>; } } @@ -560,9 +560,9 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; multiclass PSEUDO_ATOMIC_LOAD_BINOP6432 { let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in - def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - !strconcat(mnemonic, "6432 PSEUDO!"), []>; + def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + !strconcat(mnemonic, "6432 PSEUDO!"), []>; } defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">; @@ -604,77 +604,77 @@ multiclass LOCK_ArithBinOp RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, Format ImmMod, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { -def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, - MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - !strconcat(mnemonic, "{b}\t", +def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, + MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + !strconcat(mnemonic, "{b}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_NONMEM>, LOCK; +def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, + MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_NONMEM>, OpSize, LOCK; +def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, + MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; -def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - !strconcat(mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_NONMEM>, OpSize, LOCK; -def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, +def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - !strconcat(mnemonic, "{l}\t", + MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; -def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - !strconcat(mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_NONMEM>, LOCK; - -def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, - ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), - !strconcat(mnemonic, "{b}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; - -def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), - !strconcat(mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, OpSize, LOCK; - -def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), - !strconcat(mnemonic, "{l}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; -def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), - !strconcat(mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; - -def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, - ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), - !strconcat(mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, OpSize, LOCK; -def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, +def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, + ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), + !strconcat(mnemonic, "{b}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, OpSize, LOCK; + +def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), + !strconcat(mnemonic, "{l}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), + !strconcat(mnemonic, "{q}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, + ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, + ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, OpSize, LOCK; +def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, + ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, + ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), + !strconcat(mnemonic, "{l}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; +def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), - !strconcat(mnemonic, "{l}\t", + ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; -def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, - ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), - !strconcat(mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; } @@ -691,18 +691,18 @@ multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { -def #NAME#8m : I, LOCK; -def #NAME#16m : I, OpSize, LOCK; -def #NAME#32m : I, LOCK; +def NAME#16m : I, OpSize, LOCK; +def NAME#32m : I, LOCK; +def NAME#64m : RI, LOCK; -def #NAME#64m : RI, LOCK; } } @@ -714,9 +714,9 @@ multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, SDPatternOperator frag, X86MemOperand x86memop, InstrItinClass itin> { let isCodeGenOnly = 1 in { - def #NAME# : I, TB, LOCK; + def NAME : I, TB, LOCK; } } @@ -725,21 +725,21 @@ multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, InstrItinClass itin8, InstrItinClass itin> { let isCodeGenOnly = 1 in { let Defs = [AL, EFLAGS], Uses = [AL] in - def #NAME#8 : I, TB, LOCK; + def NAME#8 : I, TB, LOCK; let Defs = [AX, EFLAGS], Uses = [AX] in - def #NAME#16 : I, TB, OpSize, LOCK; + def NAME#16 : I, TB, OpSize, LOCK; let Defs = [EAX, EFLAGS], Uses = [EAX] in - def #NAME#32 : I, TB, LOCK; + def NAME#32 : I, TB, LOCK; let Defs = [RAX, EFLAGS], Uses = [RAX] in - def #NAME#64 : RI, TB, LOCK; + def NAME#64 : RI, TB, LOCK; } } @@ -764,33 +764,33 @@ multiclass ATOMIC_LOAD_BINOP opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin8, InstrItinClass itin> { let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { - def #NAME#8 : I(frag # "_8") addr:$ptr, GR8:$val))], - itin8>; - def #NAME#16 : I(frag # "_16") addr:$ptr, GR16:$val))], - itin>, OpSize; - def #NAME#32 : I(frag # "_8") addr:$ptr, GR8:$val))], + itin8>; + def NAME#16 : I(frag # "_16") addr:$ptr, GR16:$val))], + itin>, OpSize; + def NAME#32 : I(frag # "_32") addr:$ptr, GR32:$val))], + itin>; + def NAME#64 : RI(frag # "_32") addr:$ptr, GR32:$val))], + GR64:$dst, + (!cast(frag # "_64") addr:$ptr, GR64:$val))], itin>; - def #NAME#64 : RI(frag # "_64") addr:$ptr, GR64:$val))], - itin>; } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4522d895a4..9ecf5e25c4 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1304,34 +1304,34 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), multiclass ATOMIC_SWAP opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin> { let Constraints = "$val = $dst" in { - def #NAME#8rm : I(frag # "_8") addr:$ptr, GR8:$val))], + itin>; + def NAME#16rm : I(frag # "_16") addr:$ptr, GR16:$val))], + itin>, OpSize; + def NAME#32rm : I(frag # "_32") addr:$ptr, GR32:$val))], + itin>; + def NAME#64rm : RI(frag # "_8") addr:$ptr, GR8:$val))], + GR64:$dst, + (!cast(frag # "_64") addr:$ptr, GR64:$val))], itin>; - def #NAME#16rm : I(frag # "_16") addr:$ptr, GR16:$val))], - itin>, OpSize; - def #NAME#32rm : I(frag # "_32") addr:$ptr, GR32:$val))], - itin>; - def #NAME#64rm : RI(frag # "_64") addr:$ptr, GR64:$val))], - itin>; } } -- cgit v1.2.3-18-g5258 From 3ebe59c892051375623fea55e977ff559fdb3323 Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Mon, 7 Jan 2013 19:00:49 +0000 Subject: Change SMRange to be half-open (exclusive end) instead of closed (inclusive) This is necessary not only for representing empty ranges, but for handling multibyte characters in the input. (If the end pointer in a range refers to a multibyte character, should it point to the beginning or the end of the character in a char array?) Some of the code in the asm parsers was already assuming this anyway. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171765 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 56 ++++++++++++++----------------- 1 file changed, 26 insertions(+), 30 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index cbdaeffc44..ca438eb491 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -463,7 +463,7 @@ struct X86Operand : public MCParsedAsmOperand { } static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { - SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1); + SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); X86Operand *Res = new X86Operand(Token, Loc, EndLoc); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); @@ -558,10 +558,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, Parser.Lex(); // Eat percent token. const AsmToken &Tok = Parser.getTok(); + EndLoc = Tok.getEndLoc(); + if (Tok.isNot(AsmToken::Identifier)) { if (isParsingIntelSyntax()) return true; return Error(StartLoc, "invalid register name", - SMRange(StartLoc, Tok.getEndLoc())); + SMRange(StartLoc, EndLoc)); } RegNo = MatchRegisterName(Tok.getString()); @@ -582,13 +584,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, X86II::isX86_64ExtendedReg(RegNo)) return Error(StartLoc, "register %" + Tok.getString() + " is only available in 64-bit mode", - SMRange(StartLoc, Tok.getEndLoc())); + SMRange(StartLoc, EndLoc)); } // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { RegNo = X86::ST0; - EndLoc = Tok.getLoc(); Parser.Lex(); // Eat 'st' // Check to see if we have '(4)' after %st. @@ -615,11 +616,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, if (getParser().Lex().isNot(AsmToken::RParen)) return Error(Parser.getTok().getLoc(), "expected ')'"); - EndLoc = Tok.getLoc(); + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat ')' return false; } + EndLoc = Parser.getTok().getEndLoc(); + // If this is "db[0-7]", match it as an alias // for dr[0-7]. if (RegNo == 0 && Tok.getString().size() == 3 && @@ -636,7 +639,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, } if (RegNo != 0) { - EndLoc = Tok.getLoc(); + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat it. return false; } @@ -645,10 +648,9 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, if (RegNo == 0) { if (isParsingIntelSyntax()) return true; return Error(StartLoc, "invalid register name", - SMRange(StartLoc, Tok.getEndLoc())); + SMRange(StartLoc, EndLoc)); } - EndLoc = Tok.getEndLoc(); Parser.Lex(); // Eat identifier token. return false; } @@ -677,7 +679,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { unsigned BaseReg = 0, IndexReg = 0, Scale = 1; const AsmToken &Tok = Parser.getTok(); - SMLoc Start = Tok.getLoc(), End; + SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] @@ -693,9 +695,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, // Handle '[' 'symbol' ']' if (getParser().ParseExpression(Disp, End)) return 0; if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(Start, "Expected ']' token!"); + return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); return X86Operand::CreateMem(Disp, Start, End, Size); } } else if (getLexer().is(AsmToken::Integer)) { @@ -704,8 +706,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Loc = Tok.getLoc(); if (getLexer().is(AsmToken::RBrac)) { // Handle '[' number ']' + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); if (SegReg) return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, @@ -726,8 +728,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, bool ExpectRBrac = true; if (getLexer().is(AsmToken::RBrac)) { ExpectRBrac = false; + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); } if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || @@ -753,18 +755,18 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return ErrorOperand(PlusLoc, "unexpected token after +"); } else if (getLexer().is(AsmToken::Identifier)) { // This could be an index register or a displacement expression. - End = Tok.getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) return 0; + else if (getParser().ParseExpression(Disp, End)) + return 0; } } // Parse ][ as a plus. if (getLexer().is(AsmToken::RBrac)) { ExpectRBrac = false; + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); if (getLexer().is(AsmToken::LBrac)) { ExpectRBrac = true; Parser.Lex(); @@ -772,15 +774,15 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return 0; } } else if (ExpectRBrac) { - if (getParser().ParseExpression(Disp, End)) - return 0; + if (getParser().ParseExpression(Disp, End)) + return 0; } if (ExpectRBrac) { if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(End, "expected ']' token!"); + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); } // Parse the dot operator (e.g., [ebx].foo.bar). @@ -790,12 +792,11 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (ParseIntelDotOperator(Disp, &NewDisp, Err)) return ErrorOperand(Tok.getLoc(), Err); + End = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the field. Disp = NewDisp; } - End = Tok.getLoc(); - // handle [-42] if (!BaseReg && !IndexReg) return X86Operand::CreateMem(Disp, Start, End, Size); @@ -831,8 +832,8 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { } const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); - if (getParser().ParseExpression(Disp, End)) return 0; - End = Parser.getTok().getLoc(); + if (getParser().ParseExpression(Disp, End)) + return 0; bool NeedSizeDir = false; if (!Size && isParsingInlineAsm()) { @@ -921,8 +922,6 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { if (getParser().ParseExpression(Val, End)) return ErrorOperand(Start, "Unable to parse expression!"); - End = Parser.getTok().getLoc(); - // Don't emit the offset operator. InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); @@ -947,8 +946,6 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { if (getParser().ParseExpression(Val, End)) return 0; - End = Parser.getTok().getLoc(); - unsigned Size = 0; if (const MCSymbolRefExpr *SymRef = dyn_cast(Val)) { const MCSymbol &Sym = SymRef->getSymbol(); @@ -995,7 +992,6 @@ X86Operand *X86AsmParser::ParseIntelOperand() { getLexer().is(AsmToken::Minus)) { const MCExpr *Val; if (!getParser().ParseExpression(Val, End)) { - End = Parser.getTok().getLoc(); return X86Operand::CreateImm(Val, Start, End); } } @@ -1006,7 +1002,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { // If this is a segment register followed by a ':', then this is the start // of a memory reference, otherwise this is a normal register reference. if (getLexer().isNot(AsmToken::Colon)) - return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc()); + return X86Operand::CreateReg(RegNo, Start, End); getParser().Lex(); // Eat the colon. return ParseIntelMemOperand(RegNo, Start); @@ -1183,7 +1179,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); return 0; } - SMLoc MemEnd = Parser.getTok().getLoc(); + SMLoc MemEnd = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ')'. // If we have both a base register and an index register make sure they are -- cgit v1.2.3-18-g5258 From 251040bc18eedfa56d01fe92836e55cfd8c5d990 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 8 Jan 2013 00:22:56 +0000 Subject: Renamed MCInstFragment to MCRelaxableFragment and added some comments. No change in functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171822 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 5f2c75ed55..acc90eceba 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -113,7 +113,7 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const; void relaxInstruction(const MCInst &Inst, MCInst &Res) const; @@ -255,7 +255,7 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // Relax if the value is too big for a (signed) i8. return int64_t(Value) != int64_t(int8_t(Value)); -- cgit v1.2.3-18-g5258 From c7b902e7fe3498503efbfd98cabb1b1c67cadda6 Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Tue, 8 Jan 2013 18:27:24 +0000 Subject: Pad Short Functions for Intel Atom The current Intel Atom microarchitecture has a feature whereby when a function returns early then it is slightly faster to execute a sequence of NOP instructions to wait until the return address is ready, as opposed to simply stalling on the ret instruction until the return address is ready. When compiling for X86 Atom only, this patch will run a pass, called "X86PadShortFunction" which will add NOP instructions where less than four cycles elapse between function entry and return. It includes tests. This patch has been updated to address Nadav's review comments - Optimize only at >= O1 and don't do optimization if -Os is set - Stores MachineBasicBlock* instead of BBNum - Uses DenseMap instead of std::map - Fixes placement of braces Patch by Andy Zhang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171879 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/CMakeLists.txt | 1 + lib/Target/X86/X86.h | 4 + lib/Target/X86/X86.td | 9 +- lib/Target/X86/X86PadShortFunction.cpp | 171 +++++++++++++++++++++++++++++++++ lib/Target/X86/X86Subtarget.cpp | 1 + lib/Target/X86/X86Subtarget.h | 5 + lib/Target/X86/X86TargetMachine.cpp | 6 ++ 7 files changed, 194 insertions(+), 3 deletions(-) create mode 100644 lib/Target/X86/X86PadShortFunction.cpp (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 95f1f22e04..d14899d28a 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -25,6 +25,7 @@ set(sources X86JITInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp + X86PadShortFunction.cpp X86RegisterInfo.cpp X86SelectionDAGInfo.cpp X86Subtarget.cpp diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 2bff7ab15a..1f9919f159 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -66,6 +66,10 @@ FunctionPass *createEmitX86CodeToMemory(); /// \brief Creates an X86-specific Target Transformation Info pass. ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM); +/// createX86PadShortFunctions - Return a pass that pads short functions +/// with NOOPs. This will prevent a stall when returning on the Atom. +FunctionPass *createX86PadShortFunctions(); + } // End llvm namespace #endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index b9d8cf7645..3ab2899365 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -123,8 +123,11 @@ def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; +def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", + "PadShortFunctions", "true", + "Pad short functions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -167,7 +170,7 @@ def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide]>; + FeatureSlowDivide, FeaturePadShortFunctions]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp new file mode 100644 index 0000000000..c22872f128 --- /dev/null +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -0,0 +1,171 @@ +//===-------- X86PadShortFunction.cpp - pad short functions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which will pad short functions to prevent +// a stall if a function returns before the return address is ready. This +// is needed for some Intel Atom processors. +// +//===----------------------------------------------------------------------===// + +#include + +#define DEBUG_TYPE "x86-pad-short-functions" +#include "X86.h" +#include "X86InstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +STATISTIC(NumBBsPadded, "Number of basic blocks padded"); + +namespace { + struct PadShortFunc : public MachineFunctionPass { + static char ID; + PadShortFunc() : MachineFunctionPass(ID) + , Threshold(4), TM(0), TII(0) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "X86 Atom pad short functions"; + } + + private: + void findReturns(MachineBasicBlock *MBB, + unsigned int Cycles = 0); + + bool cyclesUntilReturn(MachineBasicBlock *MBB, + unsigned int &Cycles, + MachineBasicBlock::iterator *Location = 0); + + void addPadding(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &MBBI, + unsigned int NOOPsToAdd); + + const unsigned int Threshold; + DenseMap ReturnBBs; + + const TargetMachine *TM; + const TargetInstrInfo *TII; + }; + + char PadShortFunc::ID = 0; +} + +FunctionPass *llvm::createX86PadShortFunctions() { + return new PadShortFunc(); +} + +/// runOnMachineFunction - Loop over all of the basic blocks, inserting +/// NOOP instructions before early exits. +bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { + bool OptForSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + + if (OptForSize) + return false; + + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + + // Search through basic blocks and mark the ones that have early returns + ReturnBBs.clear(); + findReturns(MF.begin()); + + bool MadeChange = false; + + MachineBasicBlock::iterator ReturnLoc; + MachineBasicBlock *MBB; + unsigned int Cycles = 0; + unsigned int BBCycles; + + // Pad the identified basic blocks with NOOPs + for (DenseMap::iterator I = ReturnBBs.begin(); + I != ReturnBBs.end(); ++I) { + MBB = I->first; + Cycles = I->second; + + if (Cycles < Threshold) { + if (!cyclesUntilReturn(MBB, BBCycles, &ReturnLoc)) + continue; + + addPadding(MBB, ReturnLoc, Threshold - Cycles); + NumBBsPadded++; + MadeChange = true; + } + } + + return MadeChange; +} + +/// findReturn - Starting at MBB, follow control flow and add all +/// basic blocks that contain a return to ReturnBBs. +void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) { + // If this BB has a return, note how many cycles it takes to get there. + bool hasReturn = cyclesUntilReturn(MBB, Cycles); + if (Cycles >= Threshold) + return; + + if (hasReturn) { + ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles); + return; + } + + // Follow branches in BB and look for returns + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(); + I != MBB->succ_end(); ++I) { + findReturns(*I, Cycles); + } +} + +/// cyclesUntilReturn - if the MBB has a return instruction, set Location +/// to the instruction and return true. Return false otherwise. +/// Cycles will be incremented by the number of cycles taken to reach the +/// return or the end of the BB, whichever occurs first. +bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, + unsigned int &Cycles, + MachineBasicBlock::iterator *Location) { + for (MachineBasicBlock::iterator MBBI = MBB->begin(); + MBBI != MBB->end(); ++MBBI) { + MachineInstr *MI = MBBI; + // Mark basic blocks with a return instruction. Calls to other + // functions do not count because the called function will be padded, + // if necessary. + if (MI->isReturn() && !MI->isCall()) { + if (Location) + *Location = MBBI; + return true; + } + + Cycles += TII->getInstrLatency(TM->getInstrItineraryData(), MI); + } + + return false; +} + +/// addPadding - Add the given number of NOOP instructions to the function +/// just prior to the return at MBBI +void PadShortFunc::addPadding(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &MBBI, + unsigned int NOOPsToAdd) { + DebugLoc DL = MBBI->getDebugLoc(); + + while (NOOPsToAdd-- > 0) { + BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP)); + BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP)); + } +} diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index d493b78752..53c28f4fce 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -350,6 +350,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , UseLeaForSP(false) , HasSlowDivide(false) , PostRAScheduler(false) + , PadShortFunctions(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 44f38a1a91..080f4cfeca 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -146,6 +146,10 @@ protected: /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; + /// PadShortFunctions - True if the short functions should be padded to prevent + /// a stall when returning too early. + bool PadShortFunctions; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -231,6 +235,7 @@ public: bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide() const { return HasSlowDivide; } + bool padShortFunctions() const { return PadShortFunctions; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 847e06b960..706e64a09f 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -202,6 +202,12 @@ bool X86PassConfig::addPreEmitPass() { ShouldPrint = true; } + if (getOptLevel() != CodeGenOpt::None && + getX86Subtarget().padShortFunctions()) { + addPass(createX86PadShortFunctions()); + ShouldPrint = true; + } + return ShouldPrint; } -- cgit v1.2.3-18-g5258 From 83be7b0dd3ae9a3cb22d36ae4c1775972553b94b Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Jan 2013 01:15:42 +0000 Subject: Cost Model: Move the 'max unroll factor' variable to the TTI and add initial Cost Model support on ARM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171928 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 9cc1b180e9..6ab08cbd12 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -75,7 +75,6 @@ public: /// \name Scalar TTI Implementations /// @{ - virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; /// @} @@ -84,6 +83,7 @@ public: /// @{ virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; @@ -156,7 +156,6 @@ FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, return -1; } - X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 @@ -171,6 +170,18 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const { return 8; } +unsigned X86TTI::getMaximumUnrollFactor() const { + if (ST->isAtom()) + return 1; + + // Sandybridge and Haswell have multiple execution ports and pipelined + // vector units. + if (ST->hasAVX()) + return 4; + + return 2; +} + unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(Ty); -- cgit v1.2.3-18-g5258 From 68ca56285f9b6e82eb16ff8ea02a301f2c489fae Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 9 Jan 2013 01:57:54 +0000 Subject: These functions have default arguments of 0 for the last arg. Use them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171933 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86AsmPrinter.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 5b3e0ba22d..75fa9d2074 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -543,7 +543,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { MCSA_IndirectSymbol); // hlt; hlt; hlt; hlt; hlt hlt = 0xf4. const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4"; - OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); + OutStreamer.EmitBytes(StringRef(HltInsts, 5)); } Stubs.clear(); @@ -569,7 +569,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .long 0 if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, 4/*size*/); else // Internal to current translation unit. // @@ -578,8 +578,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // using NLPs. However, sometimes the types are local to the file. So // we need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), - 4/*size*/, 0/*addrspace*/); + OutContext), 4/*size*/); } Stubs.clear(); OutStreamer.AddBlankLine(); @@ -596,8 +595,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .long _foo OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), - OutContext), - 4/*size*/, 0/*addrspace*/); + OutContext), 4/*size*/); } Stubs.clear(); OutStreamer.AddBlankLine(); @@ -663,7 +661,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { name += ",DATA"; else name += ",data"; - OutStreamer.EmitBytes(name, 0); + OutStreamer.EmitBytes(name); } for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) { @@ -672,7 +670,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { else name = " -export:"; name += DLLExportedFns[i]->getName(); - OutStreamer.EmitBytes(name, 0); + OutStreamer.EmitBytes(name); } } } @@ -692,7 +690,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(), 0); + TD->getPointerSize()); } Stubs.clear(); } -- cgit v1.2.3-18-g5258 From 47579cf390c42e0577519e0a2b6044baece9df00 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 9 Jan 2013 03:36:49 +0000 Subject: MIsched: add an ILP window property to machine model. This was an experimental option, but needs to be defined per-target. e.g. PPC A2 needs to aggressively hide latency. I converted some in-order scheduling tests to A2. Hal is working on more test cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171946 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Schedule.td | 5 +++++ lib/Target/X86/X86ScheduleAtom.td | 1 + 2 files changed, 6 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index c14407f9ac..d99d085298 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -470,12 +470,17 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // +// ILPWindow=10 is an arbitrary threshold that approximates cycles of +// latency hidden by instruction buffers. The actual value is not very +// important but should be zero for inorder and nonzero for OOO processors. +// // The GenericModel contains no instruciton itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; let LoadLatency = 4; let HighLatency = 10; + let ILPWindow = 10; } include "X86ScheduleAtom.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 87102614cc..1e5f2d6c9a 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -525,6 +525,7 @@ def AtomModel : SchedMachineModel { // OperandCycles may be used for expected latency. let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. let HighLatency = 30;// Expected, may be overriden by OperandCycles. + let ILPWindow = 0; // Always try to hide expected latency. let Itineraries = AtomItineraries; } -- cgit v1.2.3-18-g5258 From 1ced208be9cab0f994c5df9000da36bc313b2507 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 9 Jan 2013 03:52:05 +0000 Subject: Last in the series of removing unnecessary '0' arguments for address space. Reordered the EmitULEB128IntValue arguments to make this easier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171949 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ca438eb491..5007803d93 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2017,7 +2017,7 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getParser().ParseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; -- cgit v1.2.3-18-g5258 From 13f8cf55d43980e73d6cbb8f4894607709daa311 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Jan 2013 05:14:33 +0000 Subject: Efficient lowering of vector sdiv when the divisor is a splatted power of two constant. PR 14848. The lowered sequence is based on the existing sequence the target-independent DAG Combiner creates for the scalar case. Patch by Zvi Rackover. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171953 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 50 ++++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 1 + 2 files changed, 51 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4b00b46e73..f42884dd2e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1047,6 +1047,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v4i32, Custom); } + setOperationAction(ISD::SDIV, MVT::v8i16, Custom); + setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) { @@ -1111,6 +1113,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v16i16, Custom); setOperationAction(ISD::SRA, MVT::v32i8, Custom); + setOperationAction(ISD::SDIV, MVT::v16i16, Custom); + setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); setOperationAction(ISD::SETCC, MVT::v8i32, Custom); @@ -1166,6 +1170,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SHL, MVT::v8i32, Legal); setOperationAction(ISD::SRA, MVT::v8i32, Legal); + + setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -11377,6 +11383,49 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } +SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + EVT EltTy = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue N0 = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + // Lower sdiv X, pow2-const. + BuildVectorSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return SDValue(); + + APInt SplatValue, SplatUndef; + unsigned MinSplatBits; + bool HasAnyUndefs; + if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs)) + return SDValue(); + + if ((SplatValue != 0) && + (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) { + unsigned lg2 = SplatValue.countTrailingZeros(); + // Splat the sign bit. + SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32); + SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG); + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32); + SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG); + SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL); + SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32); + SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (SplatValue.isNonNegative()) + return SRA; + + SmallVector V(NumElts, DAG.getConstant(0, EltTy)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts); + return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA); + } + return SDValue(); +} + SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -12033,6 +12082,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); + case ISD::SDIV: return LowerSDIV(Op, DAG); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 16ce364cd5..35b5abd791 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -841,6 +841,7 @@ namespace llvm { SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.2.3-18-g5258 From 14925e6b885f8bd8cf448627386d412831f4bf1b Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Jan 2013 22:29:00 +0000 Subject: ARM Cost model: Use the size of vector registers and widest vectorizable instruction to determine the max vectorization factor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 6ab08cbd12..675c896d70 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -83,6 +83,7 @@ public: /// @{ virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, @@ -165,11 +166,27 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { } unsigned X86TTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasSSE1()) + return 0; + if (ST->is64Bit()) return 16; return 8; } +unsigned X86TTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAVX()) return 256; + if (ST->hasSSE1()) return 128; + return 0; + } + + if (ST->is64Bit()) + return 64; + return 32; + +} + unsigned X86TTI::getMaximumUnrollFactor() const { if (ST->isAtom()) return 1; -- cgit v1.2.3-18-g5258 From c1ec207b615cb058d30dc642ee311ed06fe59cfe Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 10 Jan 2013 22:10:27 +0000 Subject: [ms-inline asm] Add support for calling functions from inline assembly. Part of rdar://12991541 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 35 ++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 5007803d93..d26254bbd8 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -168,6 +168,7 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; + bool AddressOf; union { struct { @@ -340,6 +341,10 @@ struct X86Operand : public MCParsedAsmOperand { return OffsetOfLoc.getPointer(); } + bool needAddressOf() const { + return AddressOf; + } + bool needSizeDirective() const { assert(Kind == Memory && "Invalid access!"); return Mem.NeedSizeDir; @@ -471,9 +476,11 @@ struct X86Operand : public MCParsedAsmOperand { } static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, + bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc()) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; + Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; return Res; } @@ -836,24 +843,39 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { return 0; bool NeedSizeDir = false; - if (!Size && isParsingInlineAsm()) { + bool IsVarDecl = false; + if (isParsingInlineAsm()) { if (const MCSymbolRefExpr *SymRef = dyn_cast(Disp)) { const MCSymbol &Sym = SymRef->getSymbol(); // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size); + unsigned tSize; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tSize, + IsVarDecl); + if (!Size) + Size = tSize; NeedSizeDir = Size > 0; } } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else + else { + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + // When parsing inline assembly we set the base register to a non-zero value // as we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, /*Scale*/1, Start, End, Size, NeedSizeDir); + } } /// Parse the '.' operator. @@ -929,7 +951,8 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { // register operand to ensure proper matching. Just pick a GPR based on // the size of a pointer. unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); + return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, + OffsetOfLoc); } /// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or @@ -952,7 +975,9 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size)) + bool IsVarDecl; + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size, + IsVarDecl)) return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); Size /= 8; // Size is in terms of bits, but we want bytes in the context. -- cgit v1.2.3-18-g5258 From 7109fbe98260d7a574762f5b9920d3d8e99052e3 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 10 Jan 2013 23:39:07 +0000 Subject: [ms-inline asm] Make sure we set a default value for AddressOf. Follow on to r172121. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172148 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d26254bbd8..8545a56caf 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -495,7 +495,7 @@ struct X86Operand : public MCParsedAsmOperand { /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false){ + unsigned Size = 0, bool NeedSizeDir = false) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -504,6 +504,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = 1; Res->Mem.Size = Size; Res->Mem.NeedSizeDir = NeedSizeDir; + Res->AddressOf = false; return Res; } -- cgit v1.2.3-18-g5258 From 805141e5043b7bd2fade62102c79b2beb2e3aec4 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Thu, 10 Jan 2013 23:43:56 +0000 Subject: Remove heavy and unused #inclues from X86TargetObjectFile.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetObjectFile.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index b8ee319291..871dacd6a1 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -8,16 +8,12 @@ //===----------------------------------------------------------------------===// #include "X86TargetObjectFile.h" -#include "X86TargetMachine.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/Mangler.h" + using namespace llvm; using namespace dwarf; -- cgit v1.2.3-18-g5258 From b789b949b60c9a28686e638f75b2640d16d7144e Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 11 Jan 2013 01:13:54 +0000 Subject: X86AsmParser.cpp: Fix up r172148, to add initializer in another CreateMem(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172157 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8545a56caf..5ce258ed0f 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -528,6 +528,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = Scale; Res->Mem.Size = Size; Res->Mem.NeedSizeDir = NeedSizeDir; + Res->AddressOf = false; return Res; } }; -- cgit v1.2.3-18-g5258 From 1452d46e0bc5ca6bea77ca85abf9b694e3b6ab84 Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Fri, 11 Jan 2013 22:06:56 +0000 Subject: Update patch for the pad short functions pass for Intel Atom (only). Adds a check for -Oz, changes the code to not re-visit BBs, and skips over DBG_VALUE instrs. Patch by Andy Zhang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86PadShortFunction.cpp | 77 ++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 18 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index c22872f128..83e75ea994 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -33,6 +33,19 @@ using namespace llvm; STATISTIC(NumBBsPadded, "Number of basic blocks padded"); namespace { + struct VisitedBBInfo { + // HasReturn - Whether the BB contains a return instruction + bool HasReturn; + + // Cycles - Number of cycles until return if HasReturn is true, otherwise + // number of cycles until end of the BB + unsigned int Cycles; + + VisitedBBInfo() : HasReturn(false), Cycles(0) {} + VisitedBBInfo(bool HasReturn, unsigned int Cycles) + : HasReturn(HasReturn), Cycles(Cycles) {} + }; + struct PadShortFunc : public MachineFunctionPass { static char ID; PadShortFunc() : MachineFunctionPass(ID) @@ -49,16 +62,21 @@ namespace { unsigned int Cycles = 0); bool cyclesUntilReturn(MachineBasicBlock *MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location = 0); + unsigned int &Cycles); void addPadding(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI, unsigned int NOOPsToAdd); const unsigned int Threshold; + + // ReturnBBs - Maps basic blocks that return to the minimum number of + // cycles until the return, starting from the entry block. DenseMap ReturnBBs; + // VisitedBBs - Cache of previously visited BBs. + DenseMap VisitedBBs; + const TargetMachine *TM; const TargetInstrInfo *TII; }; @@ -73,25 +91,26 @@ FunctionPass *llvm::createX86PadShortFunctions() { /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// NOOP instructions before early exits. bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { - bool OptForSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); - - if (OptForSize) + const AttributeSet &FnAttrs = MF.getFunction()->getAttributes(); + if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize)) { return false; + } TM = &MF.getTarget(); TII = TM->getInstrInfo(); // Search through basic blocks and mark the ones that have early returns ReturnBBs.clear(); + VisitedBBs.clear(); findReturns(MF.begin()); bool MadeChange = false; - MachineBasicBlock::iterator ReturnLoc; MachineBasicBlock *MBB; unsigned int Cycles = 0; - unsigned int BBCycles; // Pad the identified basic blocks with NOOPs for (DenseMap::iterator I = ReturnBBs.begin(); @@ -100,8 +119,16 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { Cycles = I->second; if (Cycles < Threshold) { - if (!cyclesUntilReturn(MBB, BBCycles, &ReturnLoc)) - continue; + // BB ends in a return. Skip over any DBG_VALUE instructions + // trailing the terminator. + assert(MBB->size() > 0 && + "Basic block should contain at least a RET but is empty"); + MachineBasicBlock::iterator ReturnLoc = --MBB->end(); + + while (ReturnLoc->isDebugValue()) + --ReturnLoc; + assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() && + "Basic block does not end with RET"); addPadding(MBB, ReturnLoc, Threshold - Cycles); NumBBsPadded++; @@ -127,18 +154,30 @@ void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) { // Follow branches in BB and look for returns for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(); - I != MBB->succ_end(); ++I) { + I != MBB->succ_end(); ++I) { + if (*I == MBB) + continue; findReturns(*I, Cycles); } } -/// cyclesUntilReturn - if the MBB has a return instruction, set Location -/// to the instruction and return true. Return false otherwise. +/// cyclesUntilReturn - return true if the MBB has a return instruction, +/// and return false otherwise. /// Cycles will be incremented by the number of cycles taken to reach the /// return or the end of the BB, whichever occurs first. bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location) { + unsigned int &Cycles) { + // Return cached result if BB was previously visited + DenseMap::iterator it + = VisitedBBs.find(MBB); + if (it != VisitedBBs.end()) { + VisitedBBInfo BBInfo = it->second; + Cycles += BBInfo.Cycles; + return BBInfo.HasReturn; + } + + unsigned int CyclesToEnd = 0; + for (MachineBasicBlock::iterator MBBI = MBB->begin(); MBBI != MBB->end(); ++MBBI) { MachineInstr *MI = MBBI; @@ -146,14 +185,16 @@ bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, // functions do not count because the called function will be padded, // if necessary. if (MI->isReturn() && !MI->isCall()) { - if (Location) - *Location = MBBI; + VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd); + Cycles += CyclesToEnd; return true; } - Cycles += TII->getInstrLatency(TM->getInstrItineraryData(), MI); + CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI); } + VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd); + Cycles += CyclesToEnd; return false; } -- cgit v1.2.3-18-g5258 From 08219ea2b42e140aa03ceb8e166cd52d787c48aa Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 13 Jan 2013 11:37:04 +0000 Subject: X86: Add patterns for X86ISD::VSEXT in registers. Those can occur when something between the sextload and the store is on the same chain and blocks isel. Fixes PR14887. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172353 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3175324b4c..23073a9387 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5590,6 +5590,30 @@ defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; let Predicates = [HasAVX2] in { + def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; + def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>; + + def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>; + + def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; + + def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))), + (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))), + (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))), + (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))), (VPMOVSXWDYrm addr:$src)>; def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))), @@ -5628,6 +5652,15 @@ let Predicates = [HasAVX] in { } let Predicates = [UseSSE41] in { + def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; + // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl @@ -5727,6 +5760,15 @@ let Predicates = [HasAVX] in { def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), (VPMOVZXDQrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VPMOVSXWDrm addr:$src)>; -- cgit v1.2.3-18-g5258 From 50c023d4960ee492014391b6b84a259e1e02da82 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sun, 13 Jan 2013 19:03:55 +0000 Subject: Fix typo in comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172364 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 935f9bd715..6f13186dfb 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -446,7 +446,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && (N->getOpcode() == X86ISD::CALL || (N->getOpcode() == X86ISD::TC_RETURN && - // Only does this if load can be foled into TC_RETURN. + // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just -- cgit v1.2.3-18-g5258 From 33160cf37637691de97b16ad1b67e251fc6355d4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Jan 2013 07:26:58 +0000 Subject: Create a single multiclass for SSE and AVX version of MOVL/MOVH. Prevents needing to specify everything twice. No functional change intended git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172378 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 47 ++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 23073a9387..b3d6066a8e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1110,34 +1110,41 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), // SSE 1 & 2 - Move Low packed FP Instructions //===----------------------------------------------------------------------===// -multiclass sse12_mov_hilo_packedopc, RegisterClass RC, - SDNode psnode, SDNode pdnode, string base_opc, - string asm_opr, InstrItinClass itin> { +multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, + string base_opc, string asm_opr, + InstrItinClass itin> { def PSrm : PI, TB; def PDrm : PI, TB, OpSize; + } -let AddedComplexity = 20 in { - defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}", - IIC_SSE_MOV_LH>, VEX_4V; +multiclass sse12_mov_hilo_packedopc, SDNode psnode, SDNode pdnode, + string base_opc, InstrItinClass itin> { + defm V#NAME : sse12_mov_hilo_packed_base, VEX_4V; + +let Constraints = "$src1 = $dst" in + defm NAME : sse12_mov_hilo_packed_base; } -let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $dst|$dst, $src2}", - IIC_SSE_MOV_LH>; + +let AddedComplexity = 20 in { + defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp", + IIC_SSE_MOV_LH>; } def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1235,14 +1242,8 @@ let Predicates = [UseSSE2] in { //===----------------------------------------------------------------------===// let AddedComplexity = 20 in { - defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}", - IIC_SSE_MOV_LH>, VEX_4V; -} -let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $dst|$dst, $src2}", - IIC_SSE_MOV_LH>; + defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp", + IIC_SSE_MOV_LH>; } // v2f64 extract element 1 is always custom lowered to unpack high to low -- cgit v1.2.3-18-g5258 From 29344a6349af5e37b1187de5d354cb95a5840e13 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Jan 2013 07:46:34 +0000 Subject: Simplify nested strconcats in X86 td files since strconcat can take more than 2 arguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172379 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFMA.td | 14 ++++++------ lib/Target/X86/X86InstrSSE.td | 50 +++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index f48f133b12..7759a8a2da 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -60,14 +60,14 @@ multiclass fma3p_forms opc132, bits<8> opc213, bits<8> opc231, PatFrag MemFrag128, PatFrag MemFrag256, SDNode Op, ValueType OpTy128, ValueType OpTy256> { defm r213 : fma3p_rm; let neverHasSideEffects = 1 in { defm r132 : fma3p_rm; defm r231 : fma3p_rm; } // neverHasSideEffects = 1 } @@ -160,15 +160,15 @@ multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, X86MemOperand x86memop, Operand memop, PatFrag mem_frag, ComplexPattern mem_cpat> { let neverHasSideEffects = 1 in { - defm r132 : fma3s_rm; - defm r231 : fma3s_rm; } -defm r213 : fma3s_rm, - fma3s_rm_int; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b3d6066a8e..89149c65bf 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3013,18 +3013,18 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; } @@ -3055,18 +3055,18 @@ multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; } @@ -3101,22 +3101,22 @@ multiclass sse1_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PSr : PSI, VEX; def V#NAME#PSm : PSI, VEX; def V#NAME#PSYr : PSI, VEX, VEX_L; def V#NAME#PSYm : PSI, VEX, VEX_L; @@ -3136,23 +3136,23 @@ multiclass sse1_fp_unop_p_int opc, string OpcodeStr, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PSr_Int : PSI, VEX; def V#NAME#PSm_Int : PSI, VEX; def V#NAME#PSYr_Int : PSI, VEX, VEX_L; def V#NAME#PSYm_Int : PSI, VEX, VEX_L; @@ -3174,18 +3174,18 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SDr : SDI, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SDm : SDI, VEX_4V, VEX_LIG; def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG; } @@ -3212,22 +3212,22 @@ multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PDr : PDI, VEX; def V#NAME#PDm : PDI, VEX; def V#NAME#PDYr : PDI, VEX, VEX_L; def V#NAME#PDYm : PDI, VEX, VEX_L; @@ -3986,14 +3986,14 @@ multiclass sse2_pshuffle, VEX; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), @@ -4003,14 +4003,14 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), -- cgit v1.2.3-18-g5258 From dd2e8950222ab74157b1c083ffa77b0fbaf1d210 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 14 Jan 2013 22:31:35 +0000 Subject: [ms-inline asm] Extend support for parsing Intel bracketed memory operands that have an arbitrary ordering of the base register, index register and displacement. rdar://12527141 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172484 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 363 +++++++++++++++++++++++------- 1 file changed, 277 insertions(+), 86 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 5ce258ed0f..05bb1e37e5 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -684,115 +684,298 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { return Size; } +enum IntelBracExprState { + IBES_START, + IBES_LBRAC, + IBES_RBRAC, + IBES_REGISTER, + IBES_REGISTER_STAR, + IBES_REGISTER_STAR_INTEGER, + IBES_INTEGER, + IBES_INTEGER_STAR, + IBES_INDEX_REGISTER, + IBES_IDENTIFIER, + IBES_DISP_EXPR, + IBES_MINUS, + IBES_ERROR +}; + +class IntelBracExprStateMachine { + IntelBracExprState State; + unsigned BaseReg, IndexReg, Scale; + int64_t Disp; + + unsigned TmpReg; + int64_t TmpInteger; + + bool isPlus; + +public: + IntelBracExprStateMachine(MCAsmParser &parser) : + State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0), + TmpReg(0), TmpInteger(0), isPlus(true) {} + + unsigned getBaseReg() { return BaseReg; } + unsigned getIndexReg() { return IndexReg; } + unsigned getScale() { return Scale; } + int64_t getDisp() { return Disp; } + bool isValidEndState() { return State == IBES_RBRAC; } + + void onPlus() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_INTEGER: + State = IBES_START; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_START; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_START; + break; + } + isPlus = true; + } + void onMinus() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_MINUS; + break; + case IBES_INTEGER: + State = IBES_START; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_START; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_START; + break; + } + isPlus = false; + } + void onRegister(unsigned Reg) { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_REGISTER; + TmpReg = Reg; + break; + case IBES_INTEGER_STAR: + assert (!IndexReg && "IndexReg already set!"); + State = IBES_INDEX_REGISTER; + IndexReg = Reg; + Scale = TmpInteger; + break; + } + } + void onDispExpr() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_DISP_EXPR; + break; + } + } + void onInteger(int64_t TmpInt) { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_INTEGER; + TmpInteger = TmpInt; + break; + case IBES_MINUS: + State = IBES_INTEGER; + TmpInteger = TmpInt; + break; + case IBES_REGISTER_STAR: + assert (!IndexReg && "IndexReg already set!"); + State = IBES_INDEX_REGISTER; + IndexReg = TmpReg; + Scale = TmpInt; + break; + } + } + void onStar() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_INTEGER: + State = IBES_INTEGER_STAR; + break; + case IBES_REGISTER: + State = IBES_REGISTER_STAR; + break; + } + } + void onLBrac() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_RBRAC: + State = IBES_START; + isPlus = true; + break; + } + } + void onRBrac() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_DISP_EXPR: + State = IBES_RBRAC; + break; + case IBES_INTEGER: + State = IBES_RBRAC; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_RBRAC; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_RBRAC; + break; + } + } +}; + X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { - unsigned BaseReg = 0, IndexReg = 0, Scale = 1; const AsmToken &Tok = Parser.getTok(); SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); - const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); - // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] - // Eat '[' if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); Parser.Lex(); + unsigned TmpReg = 0; + + // Try to handle '[' 'symbol' ']' if (getLexer().is(AsmToken::Identifier)) { - // Parse BaseReg - if (ParseRegister(BaseReg, Start, End)) { - // Handle '[' 'symbol' ']' - if (getParser().ParseExpression(Disp, End)) return 0; + if (ParseRegister(TmpReg, Start, End)) { + const MCExpr *Disp; + if (getParser().ParseExpression(Disp, End)) + return 0; + if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); End = Parser.getTok().getEndLoc(); Parser.Lex(); return X86Operand::CreateMem(Disp, Start, End, Size); } - } else if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Tok.getIntVal(); - Parser.Lex(); - SMLoc Loc = Tok.getLoc(); - if (getLexer().is(AsmToken::RBrac)) { - // Handle '[' number ']' - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); - if (SegReg) - return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, - Start, End, Size); - return X86Operand::CreateMem(Disp, Start, End, Size); - } else if (getLexer().is(AsmToken::Star)) { - // Handle '[' Scale*IndexReg ']' - Parser.Lex(); - SMLoc IdxRegLoc = Tok.getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); - Scale = Val; - } else - return ErrorOperand(Loc, "Unexpected token"); } - // Parse ][ as a plus. - bool ExpectRBrac = true; - if (getLexer().is(AsmToken::RBrac)) { - ExpectRBrac = false; - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - } + // Parse [ BaseReg + Scale*IndexReg + Disp ]. + bool Done = false; + IntelBracExprStateMachine SM(Parser); + + // If we parsed a register, then the end loc has already been set and + // the identifier has already been lexed. We also need to update the + // state. + if (TmpReg) + SM.onRegister(TmpReg); + + const MCExpr *Disp = 0; + while (!Done) { + bool UpdateLocLex = true; - if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || - getLexer().is(AsmToken::LBrac)) { - ExpectRBrac = true; - bool isPlus = getLexer().is(AsmToken::Plus) || - getLexer().is(AsmToken::LBrac); - Parser.Lex(); - SMLoc PlusLoc = Tok.getLoc(); - if (getLexer().is(AsmToken::Integer)) { + // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an + // identifier. Don't try an parse it as a register. + if (Tok.getString().startswith(".")) + break; + + switch (getLexer().getKind()) { + default: { + if (SM.isValidEndState()) { + Done = true; + break; + } + return ErrorOperand(Tok.getLoc(), "Unexpected token!"); + } + case AsmToken::Identifier: { + // This could be a register or a displacement expression. + if(!ParseRegister(TmpReg, Start, End)) { + SM.onRegister(TmpReg); + UpdateLocLex = false; + break; + } else if (!getParser().ParseExpression(Disp, End)) { + SM.onDispExpr(); + UpdateLocLex = false; + break; + } + return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); + } + case AsmToken::Integer: { int64_t Val = Tok.getIntVal(); - Parser.Lex(); - if (getLexer().is(AsmToken::Star)) { - Parser.Lex(); - SMLoc IdxRegLoc = Tok.getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); - Scale = Val; - } else if (getLexer().is(AsmToken::RBrac)) { - const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext()); - Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext()); - } else - return ErrorOperand(PlusLoc, "unexpected token after +"); - } else if (getLexer().is(AsmToken::Identifier)) { - // This could be an index register or a displacement expression. - if (!IndexReg) - ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) - return 0; + SM.onInteger(Val); + break; } - } - - // Parse ][ as a plus. - if (getLexer().is(AsmToken::RBrac)) { - ExpectRBrac = false; - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - if (getLexer().is(AsmToken::LBrac)) { - ExpectRBrac = true; - Parser.Lex(); - if (getParser().ParseExpression(Disp, End)) - return 0; + case AsmToken::Plus: SM.onPlus(); break; + case AsmToken::Minus: SM.onMinus(); break; + case AsmToken::Star: SM.onStar(); break; + case AsmToken::LBrac: SM.onLBrac(); break; + case AsmToken::RBrac: SM.onRBrac(); break; + } + if (!Done && UpdateLocLex) { + End = Tok.getLoc(); + Parser.Lex(); // Consume the token. } - } else if (ExpectRBrac) { - if (getParser().ParseExpression(Disp, End)) - return 0; } - if (ExpectRBrac) { - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(End, "expected ']' token!"); - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - } + if (!Disp) + Disp = MCConstantExpr::Create(SM.getDisp(), getContext()); // Parse the dot operator (e.g., [ebx].foo.bar). if (Tok.getString().startswith(".")) { @@ -806,10 +989,18 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, Disp = NewDisp; } + int BaseReg = SM.getBaseReg(); + int IndexReg = SM.getIndexReg(); + // handle [-42] - if (!BaseReg && !IndexReg) - return X86Operand::CreateMem(Disp, Start, End, Size); + if (!BaseReg && !IndexReg) { + if (!SegReg) + return X86Operand::CreateMem(Disp, Start, End); + else + return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); + } + int Scale = SM.getScale(); return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, Size); } -- cgit v1.2.3-18-g5258 From d3c965d6251e6d939f7797f8704d4e3a82f7e274 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Wed, 16 Jan 2013 21:29:55 +0000 Subject: Change CostTable model to be global to all targets Moving the X86CostTable to a common place, so that other back-ends can share the code. Also simplifying it a bit and commoning up tables with one and two types on operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172658 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 168 ++++++++++++------------------ 1 file changed, 66 insertions(+), 102 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 675c896d70..a988cfef53 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -119,44 +119,6 @@ llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { // //===----------------------------------------------------------------------===// -namespace { -struct X86CostTblEntry { - int ISD; - MVT Type; - unsigned Cost; -}; -} - -static int -FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) - return i; - - // Could not find an entry. - return -1; -} - -namespace { -struct X86TypeConversionCostTblEntry { - int ISD; - MVT Dst; - MVT Src; - unsigned Cost; -}; -} - -static int -FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, - int ISD, MVT Dst, MVT Src) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) - return i; - - // Could not find an entry. - return -1; -} - X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 @@ -206,24 +168,24 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::MUL, MVT::v4i64, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - }; + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + static const CostTableEntry AVX1CostTable[] = { + { ISD::MUL, { MVT::v8i32 }, 4 }, + { ISD::SUB, { MVT::v8i32 }, 4 }, + { ISD::ADD, { MVT::v8i32 }, 4 }, + { ISD::MUL, { MVT::v4i64 }, 4 }, + { ISD::SUB, { MVT::v4i64 }, 4 }, + { ISD::ADD, { MVT::v4i64 }, 4 }, + }; + UnaryCostTable costTable (AVX1CostTable, array_lengthof(AVX1CostTable)); // Look for AVX1 lowering tricks. if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, - LT.second); - if (Idx != -1) - return LT.first * AVX1CostTable[Idx].Cost; + unsigned cost = costTable.findCost(ISD, LT.second); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return LT.first * cost; } // Fallback to the default implementation. return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); @@ -254,30 +216,29 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, + static const CostTableEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, { MVT::v8i32, MVT::v8i16 }, 1 }, + { ISD::ZERO_EXTEND, { MVT::v8i32, MVT::v8i16 }, 1 }, + { ISD::SIGN_EXTEND, { MVT::v4i64, MVT::v4i32 }, 1 }, + { ISD::ZERO_EXTEND, { MVT::v4i64, MVT::v4i32 }, 1 }, + { ISD::TRUNCATE, { MVT::v4i32, MVT::v4i64 }, 1 }, + { ISD::TRUNCATE, { MVT::v8i16, MVT::v8i32 }, 1 }, + { ISD::SINT_TO_FP, { MVT::v8f32, MVT::v8i8 }, 1 }, + { ISD::SINT_TO_FP, { MVT::v4f32, MVT::v4i8 }, 1 }, + { ISD::UINT_TO_FP, { MVT::v8f32, MVT::v8i8 }, 1 }, + { ISD::UINT_TO_FP, { MVT::v4f32, MVT::v4i8 }, 1 }, + { ISD::FP_TO_SINT, { MVT::v8i8, MVT::v8f32 }, 1 }, + { ISD::FP_TO_SINT, { MVT::v4i8, MVT::v4f32 }, 1 }, + { ISD::ZERO_EXTEND, { MVT::v8i32, MVT::v8i1 }, 6 }, + { ISD::SIGN_EXTEND, { MVT::v8i32, MVT::v8i1 }, 9 }, + { ISD::TRUNCATE, { MVT::v8i32, MVT::v8i64 }, 3 } }; + BinaryCostTable costTable (AVXConversionTbl, array_lengthof(AVXConversionTbl)); if (ST->hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, - array_lengthof(AVXConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return AVXConversionTbl[Idx].Cost; + unsigned cost = costTable.findCost(ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return cost; } return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); @@ -293,48 +254,51 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry SSE42CostTbl[] = { - { ISD::SETCC, MVT::v2f64, 1 }, - { ISD::SETCC, MVT::v4f32, 1 }, - { ISD::SETCC, MVT::v2i64, 1 }, - { ISD::SETCC, MVT::v4i32, 1 }, - { ISD::SETCC, MVT::v8i16, 1 }, - { ISD::SETCC, MVT::v16i8, 1 }, + static const CostTableEntry SSE42CostTbl[] = { + { ISD::SETCC, { MVT::v2f64 }, 1 }, + { ISD::SETCC, { MVT::v4f32 }, 1 }, + { ISD::SETCC, { MVT::v2i64 }, 1 }, + { ISD::SETCC, { MVT::v4i32 }, 1 }, + { ISD::SETCC, { MVT::v8i16 }, 1 }, + { ISD::SETCC, { MVT::v16i8 }, 1 }, }; + UnaryCostTable costTableSSE4 (SSE42CostTbl, array_lengthof(SSE42CostTbl)); - static const X86CostTblEntry AVX1CostTbl[] = { - { ISD::SETCC, MVT::v4f64, 1 }, - { ISD::SETCC, MVT::v8f32, 1 }, + static const CostTableEntry AVX1CostTbl[] = { + { ISD::SETCC, { MVT::v4f64 }, 1 }, + { ISD::SETCC, { MVT::v8f32 }, 1 }, // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, MVT::v4i64, 4 }, - { ISD::SETCC, MVT::v8i32, 4 }, - { ISD::SETCC, MVT::v16i16, 4 }, - { ISD::SETCC, MVT::v32i8, 4 }, + { ISD::SETCC, { MVT::v4i64 }, 4 }, + { ISD::SETCC, { MVT::v8i32 }, 4 }, + { ISD::SETCC, { MVT::v16i16 }, 4 }, + { ISD::SETCC, { MVT::v32i8 }, 4 }, }; + UnaryCostTable costTableAVX1 (AVX1CostTbl, array_lengthof(AVX1CostTbl)); - static const X86CostTblEntry AVX2CostTbl[] = { - { ISD::SETCC, MVT::v4i64, 1 }, - { ISD::SETCC, MVT::v8i32, 1 }, - { ISD::SETCC, MVT::v16i16, 1 }, - { ISD::SETCC, MVT::v32i8, 1 }, + static const CostTableEntry AVX2CostTbl[] = { + { ISD::SETCC, { MVT::v4i64 }, 1 }, + { ISD::SETCC, { MVT::v8i32 }, 1 }, + { ISD::SETCC, { MVT::v16i16 }, 1 }, + { ISD::SETCC, { MVT::v32i8 }, 1 }, }; + UnaryCostTable costTableAVX2 (AVX2CostTbl, array_lengthof(AVX2CostTbl)); if (ST->hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; + unsigned cost = costTableAVX2.findCost(ISD, MTy); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return LT.first * cost; } if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTbl[Idx].Cost; + unsigned cost = costTableAVX1.findCost(ISD, MTy); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return LT.first * cost; } if (ST->hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; + unsigned cost = costTableSSE4.findCost(ISD, MTy); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return LT.first * cost; } return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); -- cgit v1.2.3-18-g5258 From 02c6325a4592fefebc837b677eaf87dc532ecb7c Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 17 Jan 2013 00:58:38 +0000 Subject: Provide a place for targets to insert ILP optimization passes. Move the early if-conversion pass into this group. ILP optimizations usually need to find the right balance between register pressure and ILP using the MachineTraceMetrics analysis to identify critical paths and estimate other costs. Such passes should run together so they can share dominator tree and loop info analyses. Besides if-conversion, future passes to run here here could include expression height reduction and ARM's MLxExpansion pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172687 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetMachine.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 706e64a09f..799f140407 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -151,6 +151,7 @@ public: } virtual bool addInstSelector(); + virtual bool addILPOpts(); virtual bool addPreRegAlloc(); virtual bool addPostRegAlloc(); virtual bool addPreEmitPass(); @@ -158,12 +159,7 @@ public: } // namespace TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { - X86PassConfig *PC = new X86PassConfig(this, PM); - - if (X86EarlyIfConv && Subtarget.hasCMov()) - PC->enablePass(&EarlyIfConverterID); - - return PC; + return new X86PassConfig(this, PM); } bool X86PassConfig::addInstSelector() { @@ -181,6 +177,14 @@ bool X86PassConfig::addInstSelector() { return false; } +bool X86PassConfig::addILPOpts() { + if (X86EarlyIfConv && getX86Subtarget().hasCMov()) { + addPass(&EarlyIfConverterID); + return true; + } + return false; +} + bool X86PassConfig::addPreRegAlloc() { return false; // -print-machineinstr shouldn't print after this. } -- cgit v1.2.3-18-g5258 From c2cbcc3acfc0e7426626331d57b35f1d6c7a4a47 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 17 Jan 2013 06:59:42 +0000 Subject: Combine AVX and SSE forms of MOVSS and MOVSD into the same multiclasses so they get instantiated together. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172704 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 124 +++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 74 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 89149c65bf..18c3dfe8f2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -436,93 +436,69 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // in terms of a copy, and just mentioned, we don't use movss/movsd for copies. //===----------------------------------------------------------------------===// -class sse12_move_rr : - SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, - [(set VR128:$dst, (vt (OpNode VR128:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; +multiclass sse12_move_rr { + def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, RC:$src2), + !strconcat(base_opc, asm_opr), + [(set VR128:$dst, (vt (OpNode VR128:$src1, + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>; -// Loading from memory automatically zeroing upper bits. -class sse12_move_rm : - SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; - -// AVX -def VMOVSSrr : sse12_move_rr, XS, VEX_4V, - VEX_LIG; -def VMOVSDrr : sse12_move_rr, XD, VEX_4V, - VEX_LIG; - -// For the disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { - def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XS, VEX_4V, VEX_LIG; - def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XD, VEX_4V, VEX_LIG; + // For the disassembler + let isCodeGenOnly = 1, hasSideEffects = 0 in + def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, RC:$src2), + !strconcat(base_opc, asm_opr), + [], IIC_SSE_MOV_S_RR>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { - def VMOVSSrm : sse12_move_rm, XS, VEX, - VEX_LIG; - let AddedComplexity = 20 in - def VMOVSDrm : sse12_move_rm, XD, VEX, - VEX_LIG; -} +multiclass sse12_move { + // AVX + defm V#NAME : sse12_move_rr, + VEX_4V, VEX_LIG; -def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - XS, VEX, VEX_LIG; -def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - XD, VEX, VEX_LIG; + def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + VEX, VEX_LIG; + // SSE1 & 2 + let Constraints = "$src1 = $dst" in { + defm NAME : sse12_move_rr; + } -// SSE1 & 2 -let Constraints = "$src1 = $dst" in { - def MOVSSrr : sse12_move_rr, XS; - def MOVSDrr : sse12_move_rr, XD; + def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; +} - // For the disassembler - let isCodeGenOnly = 1, hasSideEffects = 0 in { - def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", [], - IIC_SSE_MOV_S_RR>, XS; - def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", [], - IIC_SSE_MOV_S_RR>, XD; - } +// Loading from memory automatically zeroing upper bits. +multiclass sse12_move_rm { + def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>; } +defm MOVSS : sse12_move, XS; +defm MOVSD : sse12_move, XD; + let canFoldAsLoad = 1, isReMaterializable = 1 in { - def MOVSSrm : sse12_move_rm, XS; + defm MOVSS : sse12_move_rm, XS; let AddedComplexity = 20 in - def MOVSDrm : sse12_move_rm, XD; + defm MOVSD : sse12_move_rm, XD; } -def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; -def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; - // Patterns let Predicates = [HasAVX] in { let AddedComplexity = 15 in { -- cgit v1.2.3-18-g5258 From 6c327f92a562d9d280bdbc3bde3c0ce269a4c65c Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 17 Jan 2013 09:59:53 +0000 Subject: Optimization for the following SIGN_EXTEND pairs: v8i8 -> v8i64, v8i8 -> v8i32, v4i8 -> v4i64, v4i16 -> v4i64 for AVX and AVX2. Bug 14865. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172708 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f42884dd2e..a8294b6de9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16970,14 +16970,37 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + + if (!VT.isVector()) + return SDValue(); + + SDValue In = N->getOperand(0); + EVT InVT = In.getValueType(); + DebugLoc dl = N->getDebugLoc(); + unsigned ExtenedEltSize = VT.getVectorElementType().getSizeInBits(); + + // Split SIGN_EXTEND operation to use vmovsx instruction when possible + if (InVT == MVT::v8i8) { + if (ExtenedEltSize > 16 && !Subtarget->hasInt256()) + In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, In); + if (ExtenedEltSize > 32) + In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i32, In); + return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); + } + + if ((InVT == MVT::v4i8 || InVT == MVT::v4i16) && + ExtenedEltSize > 32 && !Subtarget->hasInt256()) { + In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); + return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); + } if (!DCI.isBeforeLegalizeOps()) return SDValue(); if (!Subtarget->hasFp256()) return SDValue(); - EVT VT = N->getValueType(0); - if (VT.isVector() && VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; -- cgit v1.2.3-18-g5258 From 505bca3617fe310a5ff07914e3cf3ea6ae4d27ed Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 17 Jan 2013 19:21:48 +0000 Subject: [ms-inline asm] Add support for the 'SIZE' and 'LENGTH' operators. Part of rdar://12576868 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 82 +++++++++++++++++-------------- 1 file changed, 46 insertions(+), 36 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 05bb1e37e5..38665f07e1 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -57,7 +57,7 @@ private: X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); - X86Operand *ParseIntelTypeOperator(SMLoc StartLoc); + X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind); X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); @@ -1043,11 +1043,11 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - unsigned tSize; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tSize, - IsVarDecl); + unsigned tLength, tSize, tType; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, + tSize, tType, IsVarDecl); if (!Size) - Size = tSize; + Size = tType * 8; // Size is in terms of bits in this context. NeedSizeDir = Size > 0; } } @@ -1148,10 +1148,19 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { OffsetOfLoc); } -/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or -/// C++ type or variable. If the variable is an array, TYPE returns the size of -/// a single element of the array. -X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { +enum IntelOperatorKind { + IOK_LENGTH, + IOK_SIZE, + IOK_TYPE +}; + +/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator +/// returns the number of elements in an array. It returns the value 1 for +/// non-array variables. The SIZE operator returns the size of a C or C++ +/// variable. A variable's size is the product of its LENGTH and TYPE. The +/// TYPE operator returns the size of a C or C++ type or variable. If the +/// variable is an array, TYPE returns the size of a single element. +X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { SMLoc TypeLoc = Start; Parser.Lex(); // Eat offset. Start = Parser.getTok().getLoc(); @@ -1162,50 +1171,51 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { if (getParser().ParseExpression(Val, End)) return 0; - unsigned Size = 0; + unsigned Length = 0, Size = 0, Type = 0; if (const MCSymbolRefExpr *SymRef = dyn_cast(Val)) { const MCSymbol &Sym = SymRef->getSymbol(); // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. bool IsVarDecl; - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size, - IsVarDecl)) + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length, + Size, Type, IsVarDecl)) return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); - - Size /= 8; // Size is in terms of bits, but we want bytes in the context. + } + unsigned CVal; + switch(OpKind) { + default: llvm_unreachable("Unexpected operand kind!"); + case IOK_LENGTH: CVal = Length; break; + case IOK_SIZE: CVal = Size; break; + case IOK_TYPE: CVal = Type; break; } // Rewrite the type operator and the C or C++ type or variable in terms of an // immediate. E.g. TYPE foo -> $$4 unsigned Len = End.getPointer() - TypeLoc.getPointer(); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size)); + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); - const MCExpr *Imm = MCConstantExpr::Create(Size, getContext()); + const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); } X86Operand *X86AsmParser::ParseIntelOperand() { SMLoc Start = Parser.getTok().getLoc(), End; - - // offset operator. StringRef AsmTokStr = Parser.getTok().getString(); - if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") && - isParsingInlineAsm()) - return ParseIntelOffsetOfOperator(Start); - - // Type directive. - if ((AsmTokStr == "type" || AsmTokStr == "TYPE") && - isParsingInlineAsm()) - return ParseIntelTypeOperator(Start); - - // Unsupported directives. - if (isParsingIntelSyntax() && - (AsmTokStr == "size" || AsmTokStr == "SIZE" || - AsmTokStr == "length" || AsmTokStr == "LENGTH")) - return ErrorOperand(Start, "Unsupported directive!"); - - // immediate. + + // Offset, length, type and size operators. + if (isParsingInlineAsm()) { + if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") + return ParseIntelOffsetOfOperator(Start); + if (AsmTokStr == "length" || AsmTokStr == "LENGTH") + return ParseIntelOperator(Start, IOK_LENGTH); + if (AsmTokStr == "size" || AsmTokStr == "SIZE") + return ParseIntelOperator(Start, IOK_SIZE); + if (AsmTokStr == "type" || AsmTokStr == "TYPE") + return ParseIntelOperator(Start, IOK_TYPE); + } + + // Immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { const MCExpr *Val; @@ -1214,7 +1224,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { } } - // register + // Register. unsigned RegNo = 0; if (!ParseRegister(RegNo, Start, End)) { // If this is a segment register followed by a ':', then this is the start @@ -1226,7 +1236,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { return ParseIntelMemOperand(RegNo, Start); } - // mem operand + // Memory operand. return ParseIntelMemOperand(0, Start); } -- cgit v1.2.3-18-g5258 From 3da67ca97383f8d305cc732019a51157f9fce290 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 18 Jan 2013 00:50:59 +0000 Subject: [ms-inline asm] Make the error message more generic now that we support the 'SIZE' and 'LENGTH' operators. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172773 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 38665f07e1..dc15a11537 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1180,7 +1180,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { bool IsVarDecl; if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length, Size, Type, IsVarDecl)) - return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); + return ErrorOperand(Start, "Unable to lookup expr!"); } unsigned CVal; switch(OpKind) { -- cgit v1.2.3-18-g5258 From 5a529e4f86bc3c76ba086662d7c4ef2d1f85ce6f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 06:44:29 +0000 Subject: Make more use of is128BitVector/is256BitVector in place of getSizeInBits() == 128/256. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172792 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 66 ++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 35 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a8294b6de9..a1b2b2ae72 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3361,8 +3361,8 @@ static bool isPSHUFLWMask(ArrayRef Mask, EVT VT, bool HasInt256) { /// is suitable for input to PALIGNR. static bool isPALIGNRMask(ArrayRef Mask, EVT VT, const X86Subtarget *Subtarget) { - if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) || - (VT.getSizeInBits() == 256 && !Subtarget->hasInt256())) + if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) || + (VT.is256BitVector() && !Subtarget->hasInt256())) return false; unsigned NumElts = VT.getVectorNumElements(); @@ -3451,7 +3451,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, /// reverse of what x86 shuffles want. static bool isSHUFPMask(ArrayRef Mask, EVT VT, bool HasFp256, bool Commuted = false) { - if (!HasFp256 && VT.getSizeInBits() == 256) + if (!HasFp256 && VT.is256BitVector()) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -3636,7 +3636,7 @@ static bool isUNPCKLMask(ArrayRef Mask, EVT VT, assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3675,7 +3675,7 @@ static bool isUNPCKHMask(ArrayRef Mask, EVT VT, assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3706,14 +3706,14 @@ static bool isUNPCKHMask(ArrayRef Mask, EVT VT, /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, - bool HasInt256) { +static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { unsigned NumElts = VT.getVectorNumElements(); + bool Is256BitVec = VT.is256BitVector(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (Is256BitVec && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3721,7 +3721,7 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, // FIXME: Need a better way to get rid of this, there's no latency difference // between UNPCKLPD and MOVDDUP, the later should always be checked first and // the former later. We should also remove the "_undef" special mask. - if (NumElts == 4 && VT.getSizeInBits() == 256) + if (NumElts == 4 && Is256BitVec) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3755,7 +3755,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3871,7 +3871,7 @@ static bool isVPERMILPMask(ArrayRef Mask, EVT VT, bool HasFp256) { unsigned NumElts = VT.getVectorNumElements(); // Only match 256-bit with 32/64-bit types - if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8)) + if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8)) return false; unsigned NumLanes = VT.getSizeInBits()/128; @@ -3927,8 +3927,8 @@ static bool isMOVSHDUPMask(ArrayRef Mask, EVT VT, unsigned NumElems = VT.getVectorNumElements(); - if ((VT.getSizeInBits() == 128 && NumElems != 4) || - (VT.getSizeInBits() == 256 && NumElems != 8)) + if ((VT.is128BitVector() && NumElems != 4) || + (VT.is256BitVector() && NumElems != 8)) return false; // "i+1" is the value the indexed mask element must have @@ -3950,8 +3950,8 @@ static bool isMOVSLDUPMask(ArrayRef Mask, EVT VT, unsigned NumElems = VT.getVectorNumElements(); - if ((VT.getSizeInBits() == 128 && NumElems != 4) || - (VT.getSizeInBits() == 256 && NumElems != 8)) + if ((VT.is128BitVector() && NumElems != 4) || + (VT.is256BitVector() && NumElems != 8)) return false; // "i" is the value the indexed mask element must have @@ -4358,12 +4358,11 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - unsigned Size = VT.getSizeInBits(); // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - if (Size == 128) { // SSE + if (VT.is128BitVector()) { // SSE if (Subtarget->hasSSE2()) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); @@ -4371,7 +4370,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } - } else if (Size == 256) { // AVX + } else if (VT.is256BitVector()) { // AVX if (Subtarget->hasInt256()) { // AVX2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; @@ -4396,11 +4395,10 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - unsigned Size = VT.getSizeInBits(); SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (Size == 256) { + if (VT.is256BitVector()) { if (HasInt256) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); @@ -4408,7 +4406,7 @@ static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG, Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); } - } else if (Size == 128) { + } else if (VT.is128BitVector()) { Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else llvm_unreachable("Unexpected vector type"); @@ -4487,14 +4485,13 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { EVT VT = V.getValueType(); DebugLoc dl = V.getDebugLoc(); - unsigned Size = VT.getSizeInBits(); - if (Size == 128) { + if (VT.is128BitVector()) { V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V); int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32), &SplatMask[0]); - } else if (Size == 256) { + } else if (VT.is256BitVector()) { // To use VPERMILPS to splat scalars, the second half of indicies must // refer to the higher part, which is a duplication of the lower one, // because VPERMILPS can only handle in-lane permutations. @@ -4518,14 +4515,14 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { int EltNo = SV->getSplatIndex(); int NumElems = SrcVT.getVectorNumElements(); - unsigned Size = SrcVT.getSizeInBits(); + bool Is256BitVec = SrcVT.is256BitVector(); - assert(((Size == 128 && NumElems > 4) || Size == 256) && - "Unknown how to promote splat for type"); + assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) && + "Unknown how to promote splat for type"); // Extract the 128-bit part containing the splat element and update // the splat element index when it refers to the higher register. - if (Size == 256) { + if (Is256BitVec) { V1 = Extract128BitVector(V1, EltNo, DAG, dl); if (EltNo >= NumElems/2) EltNo -= NumElems/2; @@ -4542,7 +4539,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // Recreate the 256-bit vector and place the same 128-bit vector // into the low and high part. This is necessary because we want // to use VPERM* to shuffle the vectors - if (Size == 256) { + if (Is256BitVec) { V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1); } @@ -6672,7 +6669,6 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // Handle splat operations if (SVOp->isSplat()) { unsigned NumElem = VT.getVectorNumElements(); - int Size = VT.getSizeInBits(); // Use vbroadcast whenever the splat comes from a foldable load SDValue Broadcast = LowerVectorBroadcast(Op, DAG); @@ -6680,8 +6676,8 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { return Broadcast; // Handle splats by matching through known shuffle masks - if ((Size == 128 && NumElem <= 4) || - (Size == 256 && NumElem <= 8)) + if ((VT.is128BitVector() && NumElem <= 4) || + (VT.is256BitVector() && NumElem <= 8)) return SDValue(); // All remaning splats are promoted to target supported vector shuffles. @@ -15970,7 +15966,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); - if (VT.getSizeInBits() != 256) + if (!VT.is256BitVector()) return SDValue(); assert((N->getOpcode() == ISD::ANY_EXTEND || @@ -15979,7 +15975,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, SDValue Narrow = N->getOperand(0); EVT NarrowVT = Narrow->getValueType(0); - if (NarrowVT.getSizeInBits() != 128) + if (!NarrowVT.is128BitVector()) return SDValue(); if (Narrow->getOpcode() != ISD::XOR && @@ -17075,7 +17071,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, } } - if (VT.isVector() && VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; -- cgit v1.2.3-18-g5258 From 0a38861364c30c73b64ae8d8ef326f655d22fefd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 06:50:59 +0000 Subject: Spelling fix: extened->extended. Trailing whitespace in same function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172793 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a1b2b2ae72..5999b155ae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16967,29 +16967,30 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); - + if (!VT.isVector()) return SDValue(); SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); DebugLoc dl = N->getDebugLoc(); - unsigned ExtenedEltSize = VT.getVectorElementType().getSizeInBits(); + unsigned ExtendedEltSize = VT.getVectorElementType().getSizeInBits(); // Split SIGN_EXTEND operation to use vmovsx instruction when possible if (InVT == MVT::v8i8) { - if (ExtenedEltSize > 16 && !Subtarget->hasInt256()) + if (ExtendedEltSize > 16 && !Subtarget->hasInt256()) In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, In); - if (ExtenedEltSize > 32) + if (ExtendedEltSize > 32) In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i32, In); return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); } if ((InVT == MVT::v4i8 || InVT == MVT::v4i16) && - ExtenedEltSize > 32 && !Subtarget->hasInt256()) { + ExtendedEltSize > 32 && !Subtarget->hasInt256()) { In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); } + if (!DCI.isBeforeLegalizeOps()) return SDValue(); -- cgit v1.2.3-18-g5258 From e6d8fa7d0b4352902886930debe459b9f477303e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 07:27:20 +0000 Subject: Minor formatting fix. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172795 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5999b155ae..4d982a4021 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5854,7 +5854,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, int EltIdx = MaskVals[i] * 2; int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx; int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1; - pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8)); pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8)); } V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1); -- cgit v1.2.3-18-g5258 From 5141d97d3ee9afca936bc870e67c53e1ed05f790 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 08:41:28 +0000 Subject: Calculate vector element size more directly for VINSERTF128/VEXTRACTF128 immediate handling. Also use MVT since this only called on legal types during pattern matching. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172797 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4d982a4021..2b6ff3602a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4011,9 +4011,8 @@ bool X86::isVEXTRACTF128Index(SDNode *N) { uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); - unsigned VL = N->getValueType(0).getVectorNumElements(); - unsigned VBits = N->getValueType(0).getSizeInBits(); - unsigned ElSize = VBits / VL; + MVT VT = N->getValueType(0).getSimpleVT(); + unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % 128 == 0; return Result; @@ -4030,9 +4029,8 @@ bool X86::isVINSERTF128Index(SDNode *N) { uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); - unsigned VL = N->getValueType(0).getVectorNumElements(); - unsigned VBits = N->getValueType(0).getSizeInBits(); - unsigned ElSize = VBits / VL; + MVT VT = N->getValueType(0).getSimpleVT(); + unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % 128 == 0; return Result; -- cgit v1.2.3-18-g5258 From 48177ac90fb940833b9deea1a6716092348cfe82 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 18 Jan 2013 23:10:30 +0000 Subject: On Sandybridge loading unaligned 256bits using two XMM loads (vmovups and vinsertf128) is faster than using a single vmovups instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172868 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2b6ff3602a..73a1d2e007 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16340,8 +16340,39 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, EVT MemVT = Ld->getMemoryVT(); DebugLoc dl = Ld->getDebugLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned RegSz = RegVT.getSizeInBits(); ISD::LoadExtType Ext = Ld->getExtensionType(); + unsigned Alignment = Ld->getAlignment(); + + // On Sandybridge unaligned 256bit loads are inefficient. + if (RegVT.is256BitVector() && !Subtarget->hasInt256() && + !DCI.isBeforeLegalizeOps() && Alignment < 32 && + Ext == ISD::NON_EXTLOAD) { + unsigned NumElems = RegVT.getVectorNumElements(); + SDValue Ptr = Ld->getBasePtr(); + SDValue Increment = DAG.getConstant(16, TLI.getPointerTy()); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + NumElems/2); + SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo(), Ld->isVolatile(), + Ld->isNonTemporal(), Ld->isInvariant(), + Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo(), Ld->isVolatile(), + Ld->isNonTemporal(), Ld->isInvariant(), + Alignment); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Load1.getValue(1), + Load2.getValue(1)); + + SDValue NewVec = DAG.getUNDEF(RegVT); + NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl); + NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl); + return DCI.CombineTo(N, NewVec, TF, true); + } // If this is a vector EXT Load then attempt to optimize it using a // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the @@ -16356,7 +16387,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, assert(MemVT.isVector() && "Must load a vector from memory"); unsigned NumElems = RegVT.getVectorNumElements(); - unsigned RegSz = RegVT.getSizeInBits(); unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); -- cgit v1.2.3-18-g5258 From cfcab21e4d0e4d7444b147898d6aed1348df3043 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 19 Jan 2013 08:27:45 +0000 Subject: Use MVT instead of EVT when computing shuffle immediates since they can only be for legal types. Keeps compiler from generating unneeded checks and handling for extended types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172893 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 73a1d2e007..b6b10e2dca 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3837,7 +3837,7 @@ static bool isVPERM2X128Mask(ArrayRef Mask, EVT VT, bool HasFp256) { /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions. static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned HalfSize = VT.getVectorNumElements()/2; @@ -4040,7 +4040,7 @@ bool X86::isVINSERTF128Index(SDNode *N) { /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. /// Handles 128-bit and 256-bit. static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for PSHUF/SHUFP"); @@ -4070,7 +4070,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT == MVT::v8i16 || VT == MVT::v16i16) && "Unsupported vector type for PSHUFHW"); @@ -4094,7 +4094,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT == MVT::v8i16 || VT == MVT::v16i16) && "Unsupported vector type for PSHUFHW"); @@ -4118,7 +4118,7 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; unsigned NumElts = VT.getVectorNumElements(); @@ -4149,8 +4149,8 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) { uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); - EVT VecVT = N->getOperand(0).getValueType(); - EVT ElVT = VecVT.getVectorElementType(); + MVT VecVT = N->getOperand(0).getValueType().getSimpleVT(); + MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); return Index / NumElemsPerChunk; @@ -4166,8 +4166,8 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); - EVT VecVT = N->getValueType(0); - EVT ElVT = VecVT.getVectorElementType(); + MVT VecVT = N->getValueType(0).getSimpleVT(); + MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); return Index / NumElemsPerChunk; @@ -4177,7 +4177,7 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { /// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. /// Handles 256-bit. static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); unsigned NumElts = VT.getVectorNumElements(); -- cgit v1.2.3-18-g5258 From ba9586544164e69754039a25cb0ef7907d27382d Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 19 Jan 2013 08:38:41 +0000 Subject: On Sandybridge split unaligned 256bit stores into two xmm-sized stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172894 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b6b10e2dca..ca8cd741e7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16344,12 +16344,15 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::LoadExtType Ext = Ld->getExtensionType(); unsigned Alignment = Ld->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8; // On Sandybridge unaligned 256bit loads are inefficient. if (RegVT.is256BitVector() && !Subtarget->hasInt256() && - !DCI.isBeforeLegalizeOps() && Alignment < 32 && - Ext == ISD::NON_EXTLOAD) { + !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) { unsigned NumElems = RegVT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + SDValue Ptr = Ld->getBasePtr(); SDValue Increment = DAG.getConstant(16, TLI.getPointerTy()); @@ -16363,7 +16366,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), - Alignment); + std::max(Alignment/2U, 1U)); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); @@ -16536,16 +16539,21 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = St->getDebugLoc(); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned Alignment = St->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8; // If we are saving a concatenation of two XMM registers, perform two stores. // On Sandy Bridge, 256-bit memory operations are executed by two // 128-bit ports. However, on Haswell it is better to issue a single 256-bit // memory operation. if (VT.is256BitVector() && !Subtarget->hasInt256() && - StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && - StoredVal.getNumOperands() == 2) { - SDValue Value0 = StoredVal.getOperand(0); - SDValue Value1 = StoredVal.getOperand(1); + StVT == VT && !IsAligned) { + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + + SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl); + SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl); SDValue Stride = DAG.getConstant(16, TLI.getPointerTy()); SDValue Ptr0 = St->getBasePtr(); @@ -16553,10 +16561,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->isNonTemporal(), Alignment); SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->isNonTemporal(), + std::max(Alignment/2U, 1U)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } -- cgit v1.2.3-18-g5258 From 00a312c478771941bc3e98cfbe6728465c769807 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 19 Jan 2013 23:14:09 +0000 Subject: Capitalize LowerVectorIntExtend to be consistent with all the other lower functions in this file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172927 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ca8cd741e7..6dbdd4e365 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6580,7 +6580,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // Reduce a vector shuffle to zext. SDValue -X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { +X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { // PMOVZX is only available from SSE41. if (!Subtarget->hasSSE41()) return SDValue(); @@ -6683,7 +6683,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { } // Check integer expanding shuffles. - SDValue NewOp = lowerVectorIntExtend(Op, DAG); + SDValue NewOp = LowerVectorIntExtend(Op, DAG); if (NewOp.getNode()) return NewOp; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 35b5abd791..dc5caaec3e 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -852,7 +852,7 @@ namespace llvm { SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, -- cgit v1.2.3-18-g5258 From 657a99c608c98bb0cad655681c1da35ddd7b1418 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 19 Jan 2013 23:36:09 +0000 Subject: Use MVT instead of EVT in more of the shuffle lowering code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172930 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6dbdd4e365..f7b40f5208 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3586,7 +3586,7 @@ static bool isMOVLHPSMask(ArrayRef Mask, EVT VT) { static SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); DebugLoc dl = SVOp->getDebugLoc(); if (VT != MVT::v8i32 && VT != MVT::v8f32) @@ -4207,7 +4207,7 @@ bool X86::isZeroNode(SDValue Elt) { /// their permute mask. static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned NumElems = VT.getVectorNumElements(); SmallVector MaskVec; @@ -5656,8 +5656,8 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = SVOp->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); if (!Subtarget->hasSSE41() || EltVT == MVT::i8) @@ -6088,7 +6088,7 @@ static SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -6214,14 +6214,14 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { if (NewOp.getNode()) return NewOp; - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned NumElems = VT.getVectorNumElements(); unsigned NumLaneElems = NumElems / 2; DebugLoc dl = SVOp->getDebugLoc(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); + MVT EltVT = VT.getVectorElementType(); + MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); SDValue Output[2]; SmallVector Mask; @@ -6326,7 +6326,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); assert(VT.is128BitVector() && "Unsupported vector size"); @@ -6656,7 +6656,7 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast(Op); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -6701,7 +6701,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(V2.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) { - EVT NewVT = NewOp.getValueType(); + MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isCommutedMOVLMask(cast(NewOp)->getMask(), NewVT, true, false)) return getVZextMovL(VT, NewVT, NewOp.getOperand(0), @@ -6710,7 +6710,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) { - EVT NewVT = NewOp.getValueType(); + MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isMOVLMask(cast(NewOp)->getMask(), NewVT)) return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget, dl); @@ -6725,7 +6725,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; @@ -6816,7 +6816,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } @@ -6855,7 +6855,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (isShift) { // No better options. Use a vshldq / vsrldq. - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } -- cgit v1.2.3-18-g5258 From 45e1c758338b0b2747d76d1e47bdce7c4d75dd56 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 00:38:18 +0000 Subject: Use MVT instead of EVT in more instruction lowering code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172933 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f7b40f5208..e30d25ae0c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4390,7 +4390,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. /// Then bitcast to their original type, ensuring they get CSE'd. -static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG, +static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); @@ -5100,7 +5100,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasFp256()) return SDValue(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); assert((VT.is128BitVector() || VT.is256BitVector()) && @@ -5298,8 +5298,8 @@ SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT ExtVT = VT.getVectorElementType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT ExtVT = VT.getVectorElementType(); unsigned NumElems = Op.getNumOperands(); // Vectors containing all zeros can be matched by pxor and xorps later @@ -5630,7 +5630,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); - EVT ResVT = Op.getValueType(); + MVT ResVT = Op.getValueType().getSimpleVT(); assert(ResVT.is256BitVector() && "Value type must be 256-bit wide"); @@ -7038,10 +7038,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); - if (!Op.getOperand(0).getValueType().is128BitVector()) + if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { @@ -7106,7 +7106,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return SDValue(); SDValue Vec = Op.getOperand(0); - EVT VecVT = Vec.getValueType(); + MVT VecVT = Vec.getValueType().getSimpleVT(); // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. @@ -7133,7 +7133,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return Res; } - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { @@ -7146,7 +7146,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. - EVT EltVT = MVT::i32; + MVT EltVT = MVT::i32; SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, @@ -7161,7 +7161,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // SHUFPS the element to the lowest double word, then movss. int Mask[4] = { static_cast(Idx), -1, -1, -1 }; - EVT VVT = Op.getOperand(0).getValueType(); + MVT VVT = Op.getOperand(0).getValueType().getSimpleVT(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, @@ -7180,7 +7180,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. int Mask[2] = { 1, -1 }; - EVT VVT = Op.getOperand(0).getValueType(); + MVT VVT = Op.getOperand(0).getValueType().getSimpleVT(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, @@ -7193,8 +7193,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - EVT EltVT = VT.getVectorElementType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SDValue N0 = Op.getOperand(0); @@ -7247,8 +7247,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - EVT EltVT = VT.getVectorElementType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SDValue N0 = Op.getOperand(0); @@ -7296,7 +7296,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT OpVT = Op.getValueType(); + MVT OpVT = Op.getValueType().getSimpleVT(); // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. -- cgit v1.2.3-18-g5258 From 3b2aba09e2534a23ab6c50d9f60d1d7d9ff59eb0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 00:43:42 +0000 Subject: Remove DebugLoc argument from static function. It can easily be obtained from the SVOp passed in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172935 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e30d25ae0c..59a25ff9a3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6135,8 +6135,9 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, /// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15> static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG) { MVT VT = SVOp->getValueType(0).getSimpleVT(); + DebugLoc dl = SVOp->getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); MVT NewVT; unsigned Scale; @@ -6691,7 +6692,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 || VT == MVT::v32i8) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); } else if ((VT == MVT::v4i32 || @@ -6699,7 +6700,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isCommutedMOVLMask(cast(NewOp)->getMask(), @@ -6708,7 +6709,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isMOVLMask(cast(NewOp)->getMask(), NewVT)) -- cgit v1.2.3-18-g5258 From f84b7500ce489d2e4039348ed30bf584f0b61973 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 00:50:58 +0000 Subject: Make some helper methods static. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172936 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 +++------- lib/Target/X86/X86ISelLowering.h | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 59a25ff9a3..ec9f6751e1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6173,7 +6173,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, /// getVZextMovL - Return a zero-extending vector move low node. /// -static SDValue getVZextMovL(EVT VT, EVT OpVT, +static SDValue getVZextMovL(MVT VT, EVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { @@ -7036,9 +7036,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue -X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); @@ -7191,9 +7189,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return SDValue(); } -SDValue -X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType().getSimpleVT(); MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dc5caaec3e..c1f940adf4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -794,9 +794,7 @@ namespace llvm { SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, -- cgit v1.2.3-18-g5258 From 0c8607ba6a21578996a7532b9390afba13bd2087 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 20 Jan 2013 08:35:56 +0000 Subject: Revert 172708. The optimization handles esoteric cases but adds a lot of complexity both to the X86 backend and to other backends. This optimization disables an important canonicalization of chains of SEXT nodes and makes SEXT and ZEXT asymmetrical. Disabling the canonicalization of consecutive SEXT nodes into a single node disables other DAG optimizations that assume that there is only one SEXT node. The AVX mask optimizations is one example. Additionally this optimization does not update the cost model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172968 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ec9f6751e1..c2b209d1b0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17000,38 +17000,14 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); - - if (!VT.isVector()) - return SDValue(); - - SDValue In = N->getOperand(0); - EVT InVT = In.getValueType(); - DebugLoc dl = N->getDebugLoc(); - unsigned ExtendedEltSize = VT.getVectorElementType().getSizeInBits(); - - // Split SIGN_EXTEND operation to use vmovsx instruction when possible - if (InVT == MVT::v8i8) { - if (ExtendedEltSize > 16 && !Subtarget->hasInt256()) - In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, In); - if (ExtendedEltSize > 32) - In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i32, In); - return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); - } - - if ((InVT == MVT::v4i8 || InVT == MVT::v4i16) && - ExtendedEltSize > 32 && !Subtarget->hasInt256()) { - In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); - return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); - } - if (!DCI.isBeforeLegalizeOps()) return SDValue(); if (!Subtarget->hasFp256()) return SDValue(); - if (VT.is256BitVector()) { + EVT VT = N->getValueType(0); + if (VT.isVector() && VT.getSizeInBits() == 256) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; -- cgit v1.2.3-18-g5258 From 26827f3dc5fc2e614fb2409e5371de6132eaa8a7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 09:02:22 +0000 Subject: Make LowerVSETCC a static function and use MVT instead of EVT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172969 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 127 +++++++++++++++++++------------------ lib/Target/X86/X86ISelLowering.h | 1 - 2 files changed, 65 insertions(+), 63 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c2b209d1b0..8f6005ff61 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9136,65 +9136,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, return SDValue(); } -SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - - if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG); - - assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - - // Optimize to BT if possible. - // Lower (X & (1 << N)) == 0 to BT(X, N). - // Lower ((X >>u N) & 1) != 0 to BT(X, N). - // Lower ((X >>s N) & 1) != 0 to BT(X, N). - if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && - Op1.getOpcode() == ISD::Constant && - cast(Op1)->isNullValue() && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); - if (NewSetCC.getNode()) - return NewSetCC; - } - - // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of - // these. - if (Op1.getOpcode() == ISD::Constant && - (cast(Op1)->getZExtValue() == 1 || - cast(Op1)->isNullValue()) && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { - - // If the input is a setcc, then reuse the input setcc or use a new one with - // the inverted condition. - if (Op0.getOpcode() == X86ISD::SETCC) { - X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); - bool Invert = (CC == ISD::SETNE) ^ - cast(Op1)->isNullValue(); - if (!Invert) return Op0; - - CCode = X86::GetOppositeBranchCondition(CCode); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); - } - } - - bool isFP = Op1.getValueType().isFloatingPoint(); - unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); - if (X86CC == X86::COND_INVALID) - return SDValue(); - - SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); - EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), EFLAGS); -} - // Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128 // ones, and then concatenate the result back. static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); @@ -9214,26 +9159,27 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl); // Issue the operation on the smaller types and concatenate the result back - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT EltVT = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } -SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDValue Cond; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); ISD::CondCode SetCCOpcode = cast(CC)->get(); - bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); + bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint(); DebugLoc dl = Op.getDebugLoc(); if (isFP) { #ifndef NDEBUG - EVT EltVT = Op0.getValueType().getVectorElementType(); + MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT(); assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif @@ -9374,6 +9320,63 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { return Result; } +SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + + MVT VT = Op.getValueType().getSimpleVT(); + + if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); + + assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + // Optimize to BT if possible. + // Lower (X & (1 << N)) == 0 to BT(X, N). + // Lower ((X >>u N) & 1) != 0 to BT(X, N). + // Lower ((X >>s N) & 1) != 0 to BT(X, N). + if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && + Op1.getOpcode() == ISD::Constant && + cast(Op1)->isNullValue() && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); + if (NewSetCC.getNode()) + return NewSetCC; + } + + // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of + // these. + if (Op1.getOpcode() == ISD::Constant && + (cast(Op1)->getZExtValue() == 1 || + cast(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + + // If the input is a setcc, then reuse the input setcc or use a new one with + // the inverted condition. + if (Op0.getOpcode() == X86ISD::SETCC) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast(Op1)->isNullValue(); + if (!Invert) return Op0; + + CCode = X86::GetOppositeBranchCondition(CCode); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } + } + + bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint(); + unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); + if (X86CC == X86::COND_INVALID) + return SDValue(); + + SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); + EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), EFLAGS); +} + // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c1f940adf4..68560167a3 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -822,7 +822,6 @@ namespace llvm { SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.2.3-18-g5258 From 053a2119835ac6ca3484f1b496cabd43c37e4279 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Sun, 20 Jan 2013 20:57:20 +0000 Subject: Revert CostTable algorithm, will re-write git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172992 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 168 ++++++++++++++++++------------ 1 file changed, 102 insertions(+), 66 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index a988cfef53..675c896d70 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -119,6 +119,44 @@ llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { // //===----------------------------------------------------------------------===// +namespace { +struct X86CostTblEntry { + int ISD; + MVT Type; + unsigned Cost; +}; +} + +static int +FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) + return i; + + // Could not find an entry. + return -1; +} + +namespace { +struct X86TypeConversionCostTblEntry { + int ISD; + MVT Dst; + MVT Src; + unsigned Cost; +}; +} + +static int +FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, + int ISD, MVT Dst, MVT Src) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) + return i; + + // Could not find an entry. + return -1; +} + X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 @@ -168,24 +206,24 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - static const CostTableEntry AVX1CostTable[] = { - { ISD::MUL, { MVT::v8i32 }, 4 }, - { ISD::SUB, { MVT::v8i32 }, 4 }, - { ISD::ADD, { MVT::v8i32 }, 4 }, - { ISD::MUL, { MVT::v4i64 }, 4 }, - { ISD::SUB, { MVT::v4i64 }, 4 }, - { ISD::ADD, { MVT::v4i64 }, 4 }, - }; - UnaryCostTable costTable (AVX1CostTable, array_lengthof(AVX1CostTable)); + static const X86CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::MUL, MVT::v4i64, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + }; // Look for AVX1 lowering tricks. if (ST->hasAVX()) { - unsigned cost = costTable.findCost(ISD, LT.second); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return LT.first * cost; + int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + LT.second); + if (Idx != -1) + return LT.first * AVX1CostTable[Idx].Cost; } // Fallback to the default implementation. return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); @@ -216,29 +254,30 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - static const CostTableEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, { MVT::v8i32, MVT::v8i16 }, 1 }, - { ISD::ZERO_EXTEND, { MVT::v8i32, MVT::v8i16 }, 1 }, - { ISD::SIGN_EXTEND, { MVT::v4i64, MVT::v4i32 }, 1 }, - { ISD::ZERO_EXTEND, { MVT::v4i64, MVT::v4i32 }, 1 }, - { ISD::TRUNCATE, { MVT::v4i32, MVT::v4i64 }, 1 }, - { ISD::TRUNCATE, { MVT::v8i16, MVT::v8i32 }, 1 }, - { ISD::SINT_TO_FP, { MVT::v8f32, MVT::v8i8 }, 1 }, - { ISD::SINT_TO_FP, { MVT::v4f32, MVT::v4i8 }, 1 }, - { ISD::UINT_TO_FP, { MVT::v8f32, MVT::v8i8 }, 1 }, - { ISD::UINT_TO_FP, { MVT::v4f32, MVT::v4i8 }, 1 }, - { ISD::FP_TO_SINT, { MVT::v8i8, MVT::v8f32 }, 1 }, - { ISD::FP_TO_SINT, { MVT::v4i8, MVT::v4f32 }, 1 }, - { ISD::ZERO_EXTEND, { MVT::v8i32, MVT::v8i1 }, 6 }, - { ISD::SIGN_EXTEND, { MVT::v8i32, MVT::v8i1 }, 9 }, - { ISD::TRUNCATE, { MVT::v8i32, MVT::v8i64 }, 3 } + static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; - BinaryCostTable costTable (AVXConversionTbl, array_lengthof(AVXConversionTbl)); if (ST->hasAVX()) { - unsigned cost = costTable.findCost(ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return cost; + int Idx = FindInConvertTable(AVXConversionTbl, + array_lengthof(AVXConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVXConversionTbl[Idx].Cost; } return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); @@ -254,51 +293,48 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const CostTableEntry SSE42CostTbl[] = { - { ISD::SETCC, { MVT::v2f64 }, 1 }, - { ISD::SETCC, { MVT::v4f32 }, 1 }, - { ISD::SETCC, { MVT::v2i64 }, 1 }, - { ISD::SETCC, { MVT::v4i32 }, 1 }, - { ISD::SETCC, { MVT::v8i16 }, 1 }, - { ISD::SETCC, { MVT::v16i8 }, 1 }, + static const X86CostTblEntry SSE42CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + { ISD::SETCC, MVT::v2i64, 1 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, }; - UnaryCostTable costTableSSE4 (SSE42CostTbl, array_lengthof(SSE42CostTbl)); - static const CostTableEntry AVX1CostTbl[] = { - { ISD::SETCC, { MVT::v4f64 }, 1 }, - { ISD::SETCC, { MVT::v8f32 }, 1 }, + static const X86CostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1 }, + { ISD::SETCC, MVT::v8f32, 1 }, // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, { MVT::v4i64 }, 4 }, - { ISD::SETCC, { MVT::v8i32 }, 4 }, - { ISD::SETCC, { MVT::v16i16 }, 4 }, - { ISD::SETCC, { MVT::v32i8 }, 4 }, + { ISD::SETCC, MVT::v4i64, 4 }, + { ISD::SETCC, MVT::v8i32, 4 }, + { ISD::SETCC, MVT::v16i16, 4 }, + { ISD::SETCC, MVT::v32i8, 4 }, }; - UnaryCostTable costTableAVX1 (AVX1CostTbl, array_lengthof(AVX1CostTbl)); - static const CostTableEntry AVX2CostTbl[] = { - { ISD::SETCC, { MVT::v4i64 }, 1 }, - { ISD::SETCC, { MVT::v8i32 }, 1 }, - { ISD::SETCC, { MVT::v16i16 }, 1 }, - { ISD::SETCC, { MVT::v32i8 }, 1 }, + static const X86CostTblEntry AVX2CostTbl[] = { + { ISD::SETCC, MVT::v4i64, 1 }, + { ISD::SETCC, MVT::v8i32, 1 }, + { ISD::SETCC, MVT::v16i16, 1 }, + { ISD::SETCC, MVT::v32i8, 1 }, }; - UnaryCostTable costTableAVX2 (AVX2CostTbl, array_lengthof(AVX2CostTbl)); if (ST->hasAVX2()) { - unsigned cost = costTableAVX2.findCost(ISD, MTy); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return LT.first * cost; + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX2CostTbl[Idx].Cost; } if (ST->hasAVX()) { - unsigned cost = costTableAVX1.findCost(ISD, MTy); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return LT.first * cost; + int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTbl[Idx].Cost; } if (ST->hasSSE42()) { - unsigned cost = costTableSSE4.findCost(ISD, MTy); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return LT.first * cost; + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTbl[Idx].Cost; } return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); -- cgit v1.2.3-18-g5258 From d713c0f7f1556f1ff74b3e953be5d35b614cc081 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 21:34:37 +0000 Subject: Capitalize lowerTRUNCATE so that it matches the other lower functions in this file despite it not matching coding standards. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172994 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8f6005ff61..6c810bb49a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8379,7 +8379,7 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi); } -SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); SDValue In = Op.getOperand(0); @@ -12024,7 +12024,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); - case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG); + case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 68560167a3..69aa980a7c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -809,7 +809,7 @@ namespace llvm { SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.2.3-18-g5258 From a080daf5c6d9f62d021efe7c6c608a71d591dc44 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 21:50:27 +0000 Subject: Convert more EVT's to MVT's in the lowering methods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172995 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 47 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6c810bb49a..9f7d4a62d0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8296,9 +8296,9 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { - EVT VT = Op->getValueType(0); + MVT VT = Op->getValueType(0).getSimpleVT(); SDValue In = Op->getOperand(0); - EVT InVT = In.getValueType(); + MVT InVT = In.getValueType().getSimpleVT(); DebugLoc dl = Op->getDebugLoc(); // Optimize vectors in AVX mode: @@ -8327,7 +8327,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); - EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + MVT HVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); @@ -8349,9 +8349,9 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); if (Subtarget->hasFp256()) { SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); @@ -8381,9 +8381,9 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. @@ -8498,9 +8498,10 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) { - if (Op.getValueType() == MVT::v8i16) - return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(), + MVT VT = Op.getValueType().getSimpleVT(); + if (VT.isVector()) { + if (VT == MVT::v8i16) + return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), VT, DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(), MVT::v8i32, Op.getOperand(0))); return SDValue(); @@ -8542,9 +8543,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); @@ -8556,8 +8557,8 @@ SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT EltVT = VT; + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { EltVT = VT.getVectorElementType(); @@ -8588,8 +8589,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT EltVT = VT; + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { EltVT = VT.getVectorElementType(); @@ -8623,8 +8624,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT SrcVT = Op1.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT SrcVT = Op1.getValueType().getSimpleVT(); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -8694,7 +8695,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1). SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0, @@ -9499,7 +9500,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && @@ -9610,9 +9611,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op->getValueType(0); + MVT VT = Op->getValueType(0).getSimpleVT(); SDValue In = Op->getOperand(0); - EVT InVT = In.getValueType(); + MVT InVT = In.getValueType().getSimpleVT(); DebugLoc dl = Op->getDebugLoc(); if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && @@ -9646,7 +9647,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); -- cgit v1.2.3-18-g5258 From b84b4236343727ab1cd9c1cb4e3e3a43fa69c6c2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 06:13:28 +0000 Subject: Make helper method static. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173005 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 ++--- lib/Target/X86/X86ISelLowering.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9f7d4a62d0..671f0b41d3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8540,8 +8540,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return FIST; } -SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { DebugLoc DL = Op.getDebugLoc(); MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); @@ -12031,7 +12030,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); - case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 69aa980a7c..6d5e8c2180 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -815,7 +815,6 @@ namespace llvm { SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.2.3-18-g5258 From b99bafe36d0c63b9febc7c620cde3663f9f7792f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 06:21:54 +0000 Subject: Fix some 80 column violations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173006 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 671f0b41d3..d222ba1032 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7508,8 +7508,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, - int64_t Offset, - SelectionDAG &DAG) const { + int64_t Offset, SelectionDAG &DAG) const { // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. unsigned char OpFlags = @@ -7729,7 +7728,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, Subtarget->is64Bit(), - getTargetMachine().getRelocationModel() == Reloc::PIC_); + getTargetMachine().getRelocationModel() == Reloc::PIC_); } llvm_unreachable("Unknown TLS model."); } @@ -8108,7 +8107,8 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, SVT == MVT::v8i8 || SVT == MVT::v8i16) && "Custom UINT_TO_FP is not supported!"); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements()); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, + SVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); } @@ -8201,8 +8201,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); } -std::pair X86TargetLowering:: -FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const { +std::pair +X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, + bool IsSigned, bool IsReplace) const { DebugLoc DL = Op.getDebugLoc(); EVT DstTy = Op.getValueType(); @@ -8704,7 +8705,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { // LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able. // -SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree."); if (!Subtarget->hasSSE41()) -- cgit v1.2.3-18-g5258 From 9b33ef7c67267d1444bf250a54053bd821b773a0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 06:57:59 +0000 Subject: Remove trailing whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173008 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d222ba1032..ad7e0306a1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5668,25 +5668,25 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, // Check the mask for BLEND and build the value. unsigned MaskValue = 0; // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. - unsigned NumLanes = (NumElems-1)/8 + 1; + unsigned NumLanes = (NumElems-1)/8 + 1; unsigned NumElemsInLane = NumElems / NumLanes; // Blend for v16i16 should be symetric for the both lanes. for (unsigned i = 0; i < NumElemsInLane; ++i) { - int SndLaneEltIdx = (NumLanes == 2) ? + int SndLaneEltIdx = (NumLanes == 2) ? SVOp->getMaskElt(i + NumElemsInLane) : -1; int EltIdx = SVOp->getMaskElt(i); - if ((EltIdx == -1 || EltIdx == (int)i) && + if ((EltIdx == -1 || EltIdx == (int)i) && (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane))) continue; - if (((unsigned)EltIdx == (i + NumElems)) && - (SndLaneEltIdx == -1 || + if (((unsigned)EltIdx == (i + NumElems)) && + (SndLaneEltIdx == -1 || (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane)) MaskValue |= (1<hasInt256())) { - BlendVT = EVT::getVectorVT(*DAG.getContext(), - EVT::getFloatingPointVT(EltVT.getSizeInBits()), + BlendVT = EVT::getVectorVT(*DAG.getContext(), + EVT::getFloatingPointVT(EltVT.getSizeInBits()), NumElems); V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2); } - + SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, DAG.getConstant(MaskValue, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Ret); -- cgit v1.2.3-18-g5258 From bbf9d3e4936d21c0089d2fa51da7e10910a1fb36 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 07:19:54 +0000 Subject: Use MVT instead of EVT in LowerVECTOR_SHUFFLEtoBlend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173009 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ad7e0306a1..e627dd45af 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5692,17 +5692,16 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, // Convert i32 vectors to floating point if it is not AVX2. // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors. - EVT BlendVT = VT; + MVT BlendVT = VT; if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) { - BlendVT = EVT::getVectorVT(*DAG.getContext(), - EVT::getFloatingPointVT(EltVT.getSizeInBits()), - NumElems); + BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()), + NumElems); V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2); } - SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, - DAG.getConstant(MaskValue, MVT::i32)); + SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, + DAG.getConstant(MaskValue, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Ret); } -- cgit v1.2.3-18-g5258 From 04f74a149d16ff92722c3c333ab36b130fd8cae7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 07:25:16 +0000 Subject: Use <0 checks in place of ==-1 because it results in simpler code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e627dd45af..ea514c6d6f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5678,12 +5678,12 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, SVOp->getMaskElt(i + NumElemsInLane) : -1; int EltIdx = SVOp->getMaskElt(i); - if ((EltIdx == -1 || EltIdx == (int)i) && - (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane))) + if ((EltIdx < 0 || EltIdx == (int)i) && + (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane))) continue; if (((unsigned)EltIdx == (i + NumElems)) && - (SndLaneEltIdx == -1 || + (SndLaneEltIdx < 0 || (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane)) MaskValue |= (1< Date: Tue, 22 Jan 2013 09:46:31 +0000 Subject: Make APFloat constructor require explicit semantics. Previously we tried to infer it from the bit width size, with an added IsIEEE argument for the PPC/IEEE 128-bit case, which had a default value. This default value allowed bugs to creep in, where it was inappropriate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173138 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 48 ++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ea514c6d6f..b740897636 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8010,9 +8010,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SmallVector CV1; CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 0x4330000000000000ULL)))); CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 0x4530000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); @@ -8565,9 +8567,11 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { } Constant *C; if (EltVT == MVT::f64) - C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, ~(1ULL << 63)))); else - C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, + APInt(32, ~(1U << 31)))); C = ConstantVector::getSplat(NumElts, C); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); @@ -8597,9 +8601,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { } Constant *C; if (EltVT == MVT::f64) - C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 1ULL << 63))); else - C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, + APInt(32, 1U << 31))); C = ConstantVector::getSplat(NumElts, C); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); @@ -8643,13 +8649,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // First get the sign bit of second operand. SmallVector CV; if (SrcVT == MVT::f64) { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); + const fltSemantics &Sem = APFloat::IEEEdouble; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + const fltSemantics &Sem = APFloat::IEEEsingle; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -8672,13 +8680,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Clear first operand sign bit. CV.clear(); if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); + const fltSemantics &Sem = APFloat::IEEEdouble; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, + APInt(64, ~(1ULL << 63))))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + const fltSemantics &Sem = APFloat::IEEEsingle; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, + APInt(32, ~(1U << 31))))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); } C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); -- cgit v1.2.3-18-g5258 From 9dd2a3b1f2c253e20262535bb89b1ab6cc680ece Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 22 Jan 2013 18:02:49 +0000 Subject: Initial patch for x32 ABI support. Add the x32 environment kind to the triple, and separate the concept of pointer size and callee save stack slot size, since they're not equal on x32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173175 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 16488eb7ae..7815ae98c9 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -44,7 +44,7 @@ void X86MCAsmInfoDarwin::anchor() { } X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { bool is64Bit = T.getArch() == Triple::x86_64; if (is64Bit) - PointerSize = 8; + PointerSize = CalleeSaveStackSlotSize = 8; AssemblerDialect = AsmWriterFlavor; @@ -76,8 +76,16 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) void X86ELFMCAsmInfo::anchor() { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { - if (T.getArch() == Triple::x86_64) - PointerSize = 8; + bool is64Bit = T.getArch() == Triple::x86_64; + bool isX32 = T.getEnvironment() == Triple::GNUX32; + + // For ELF, x86-64 pointer size depends on the ABI. + // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64 + // with the x32 ABI, pointer size remains the default 4. + PointerSize = (is64Bit && !isX32) ? 8 : 4; + + // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI. + CalleeSaveStackSlotSize = is64Bit ? 8 : 4; AssemblerDialect = AsmWriterFlavor; -- cgit v1.2.3-18-g5258 From 341c5fbe840cffedc4155a2cf130626d2bba11b5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 22 Jan 2013 18:05:59 +0000 Subject: X86: Make sure we account for the FMA4 register immediate value, otherwise rip-rel relocations will be off by one byte. PR15040. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFormats.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 6151d5cce2..44e574d246 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -570,7 +570,7 @@ class FMA3 o, Format F, dag outs, dag ins, string asm, // FMA4 Instruction Templates class FMA4 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = IIC_DEFAULT> - : I, TA, + : Ii8, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template -- cgit v1.2.3-18-g5258 From 13d08bf4159d8ffb780bb2e29a03763c17543001 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Tue, 22 Jan 2013 21:47:38 +0000 Subject: Fix an issue of pseudo atomic instruction DAG schedule - Add list of physical registers clobbered in pseudo atomic insts Physical registers are clobbered when pseudo atomic instructions are expanded. Add them in clobber list to prevent DAG scheduler to mis-schedule them after these insns are declared side-effect free. - Add test case from Michael Kuperstein git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173200 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86InstrCompiler.td | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b740897636..04758146e5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12840,7 +12840,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && @@ -13072,7 +13072,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 2a26a22bc1..f3879628c5 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -513,15 +513,19 @@ def CMOV_RFP80 : I<0, Pseudo, multiclass PSEUDO_ATOMIC_LOAD_BINOP { let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { + let Defs = [EFLAGS, AL] in def NAME#8 : I<0, Pseudo, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), !strconcat(mnemonic, "8 PSEUDO!"), []>; + let Defs = [EFLAGS, AX] in def NAME#16 : I<0, Pseudo,(outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), !strconcat(mnemonic, "16 PSEUDO!"), []>; + let Defs = [EFLAGS, EAX] in def NAME#32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), !strconcat(mnemonic, "32 PSEUDO!"), []>; + let Defs = [EFLAGS, RAX] in def NAME#64 : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), !strconcat(mnemonic, "64 PSEUDO!"), []>; @@ -559,7 +563,8 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; multiclass PSEUDO_ATOMIC_LOAD_BINOP6432 { - let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in + let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX], + mayLoad = 1, mayStore = 1, hasSideEffects = 0 in def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), (ins i64mem:$ptr, GR32:$val1, GR32:$val2), !strconcat(mnemonic, "6432 PSEUDO!"), []>; -- cgit v1.2.3-18-g5258 From b3755e7fa2e386e9bd348eda6b1876ae09c1bf99 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Thu, 24 Jan 2013 23:01:00 +0000 Subject: Moving Cost Tables up to share with other targets git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173382 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 59 ++++++------------------------- 1 file changed, 11 insertions(+), 48 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 675c896d70..f3dfa0e413 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" using namespace llvm; // Declare the pass initialization routine locally as target-specific passes @@ -119,44 +120,6 @@ llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { // //===----------------------------------------------------------------------===// -namespace { -struct X86CostTblEntry { - int ISD; - MVT Type; - unsigned Cost; -}; -} - -static int -FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) - return i; - - // Could not find an entry. - return -1; -} - -namespace { -struct X86TypeConversionCostTblEntry { - int ISD; - MVT Dst; - MVT Src; - unsigned Cost; -}; -} - -static int -FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, - int ISD, MVT Dst, MVT Src) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) - return i; - - // Could not find an entry. - return -1; -} - X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 @@ -206,7 +169,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry AVX1CostTable[] = { + static const CostTblEntry AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. // Two ops + 1 extract + 1 insert = 4. @@ -220,7 +183,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { // Look for AVX1 lowering tricks. if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + int Idx = CostTableLookup(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, LT.second); if (Idx != -1) return LT.first * AVX1CostTable[Idx].Cost; @@ -254,7 +217,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + static const TypeConversionCostTblEntry AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -273,7 +236,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { }; if (ST->hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, + int Idx = ConvertCostTableLookup(AVXConversionTbl, array_lengthof(AVXConversionTbl), ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) @@ -293,7 +256,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry SSE42CostTbl[] = { + static const CostTblEntry SSE42CostTbl[] = { { ISD::SETCC, MVT::v2f64, 1 }, { ISD::SETCC, MVT::v4f32, 1 }, { ISD::SETCC, MVT::v2i64, 1 }, @@ -302,7 +265,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v16i8, 1 }, }; - static const X86CostTblEntry AVX1CostTbl[] = { + static const CostTblEntry AVX1CostTbl[] = { { ISD::SETCC, MVT::v4f64, 1 }, { ISD::SETCC, MVT::v8f32, 1 }, // AVX1 does not support 8-wide integer compare. @@ -312,7 +275,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v32i8, 4 }, }; - static const X86CostTblEntry AVX2CostTbl[] = { + static const CostTblEntry AVX2CostTbl[] = { { ISD::SETCC, MVT::v4i64, 1 }, { ISD::SETCC, MVT::v8i32, 1 }, { ISD::SETCC, MVT::v16i16, 1 }, @@ -320,19 +283,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, }; if (ST->hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + int Idx = CostTableLookup(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); if (Idx != -1) return LT.first * AVX2CostTbl[Idx].Cost; } if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + int Idx = CostTableLookup(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); if (Idx != -1) return LT.first * AVX1CostTbl[Idx].Cost; } if (ST->hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + int Idx = CostTableLookup(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); if (Idx != -1) return LT.first * SSE42CostTbl[Idx].Cost; } -- cgit v1.2.3-18-g5258 From a5597f0eaf1f93c6d0bc641a0cc54ecffb33955a Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 25 Jan 2013 22:07:43 +0000 Subject: In this patch, we teach X86_64TargetMachine that it has a ILP32 (defined by the x32 ABI) mode, in which case its pointers are 32-bits in size. This knowledge is also added to X86RegisterInfo that now returns the appropriate registers in getPointerRegClass. There are many outcomes to this change. In order to keep the patches separate and manageable, we start by focusing on some simple testable cases. The patch adds a test with passing a pointer to a function - focusing on the difference between the two data models for x86-64. Another test is added for handling of 'sret' arguments (and functionality is added in X86ISelLowering to make it work). A note on naming: the "x32 ABI" document refers to the AMD64 architecture (in LLVM it's distinguished by being is64Bits() in the x86 subtarget) with two variations: the LP64 (default) data model, and the ILP32 data model. This patch adds predicates to the subtarget which are consistent with this naming scheme. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173503 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 25 ++++++++++++++----------- lib/Target/X86/X86RegisterInfo.cpp | 9 +++++---- lib/Target/X86/X86Subtarget.h | 15 ++++++++++++++- lib/Target/X86/X86TargetMachine.cpp | 8 ++++++-- 4 files changed, 39 insertions(+), 18 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 04758146e5..0c12410361 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1646,10 +1646,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); } - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. We saved the argument into - // a virtual register in the entry block, so now we copy the value out - // and into %rax. + // The x86-64 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // We saved the argument into a virtual register in the entry block, + // so now we copy the value out and into %rax/%eax. if (Subtarget->is64Bit() && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { MachineFunction &MF = DAG.getMachineFunction(); @@ -1659,11 +1659,12 @@ X86TargetLowering::LowerReturn(SDValue Chain, "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); - Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); + unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX; + Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); - // RAX now acts like a return value. - MRI.addLiveOut(X86::RAX); + // RAX/EAX now acts like a return value. + MRI.addLiveOut(RetValReg); } RetOps[0] = Chain; // Update chain. @@ -2015,14 +2016,16 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(ArgValue); } - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. Save the argument into - // a virtual register so that we can access it from the return points. + // The x86-64 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // Save the argument into a virtual register so that we can access it + // from the return points. if (Is64Bit && MF.getFunction()->hasStructRetAttr()) { X86MachineFunctionInfo *FuncInfo = MF.getInfo(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + MVT PtrTy = getPointerTy(); + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); FuncInfo->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 58064b8695..1dd1e41af8 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -177,20 +177,21 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ const TargetRegisterClass * X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + const X86Subtarget &Subtarget = TM.getSubtarget(); switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. - if (TM.getSubtarget().is64Bit()) + if (Subtarget.isTarget64BitLP64()) return &X86::GR64RegClass; return &X86::GR32RegClass; case 1: // Normal GPRs except the stack pointer (for encoding reasons). - if (TM.getSubtarget().is64Bit()) + if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; case 2: // Available for tailcall (not callee-saved GPRs). - if (TM.getSubtarget().isTargetWin64()) + if (Subtarget.isTargetWin64()) return &X86::GR64_TCW64RegClass; - if (TM.getSubtarget().is64Bit()) + else if (Subtarget.is64Bit()) return &X86::GR64_TCRegClass; const Function *F = MF.getFunction(); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 080f4cfeca..b325f62126 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -194,7 +194,20 @@ public: /// instruction. void AutoDetectSubtargetFeatures(); - bool is64Bit() const { return In64BitMode; } + /// Is this x86_64? (disregarding specific ABI / programming model) + bool is64Bit() const { + return In64BitMode; + } + + /// Is this x86_64 with the ILP32 programming model (x32 ABI)? + bool isTarget64BitILP32() const { + return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32); + } + + /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? + bool isTarget64BitLP64() const { + return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32); + } PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 799f140407..8aa58a2042 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -59,8 +59,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true), - DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" - "n8:16:32:64-S128"), + // The x32 ABI dictates the ILP32 programming model for x64. + DL(getSubtargetImpl()->isTarget64BitILP32() ? + "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + "n8:16:32:64-S128" : + "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + "n8:16:32:64-S128"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), -- cgit v1.2.3-18-g5258 From 11f2bf7f15a7d3b3df500f3f3e76355c888c23c7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 26 Jan 2013 11:44:21 +0000 Subject: X86: Do splat promotion later, so the optimizer can chew on it first. This catches many cases where we can emit a more efficient shuffle for a specific mask or when the mask contains undefs. Once the splat is lowered to unpacks we can't do that anymore. There is a possibility of moving the promotion after pshufb matching, but I'm not sure if pshufb with a mask loaded from memory is faster than 3 shuffles, so I avoided that for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0c12410361..6daa9b65e6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5839,6 +5839,11 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, } } + // Promote splats to a larger type which usually leads to more efficient code. + // FIXME: Is this true if pshufb is available? + if (SVOp->isSplat()) + return PromoteSplat(SVOp, DAG); + // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. @@ -5972,6 +5977,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef MaskVals = SVOp->getMask(); + // Promote splats to a larger type which usually leads to more efficient code. + // FIXME: Is this true if pshufb is available? + if (SVOp->isSplat()) + return PromoteSplat(SVOp, DAG); + // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. @@ -6669,20 +6679,10 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // Handle splat operations if (SVOp->isSplat()) { - unsigned NumElem = VT.getVectorNumElements(); - // Use vbroadcast whenever the splat comes from a foldable load SDValue Broadcast = LowerVectorBroadcast(Op, DAG); if (Broadcast.getNode()) return Broadcast; - - // Handle splats by matching through known shuffle masks - if ((VT.is128BitVector() && NumElem <= 4) || - (VT.is256BitVector() && NumElem <= 8)) - return SDValue(); - - // All remaning splats are promoted to target supported vector shuffles. - return PromoteSplat(SVOp, DAG); } // Check integer expanding shuffles. -- cgit v1.2.3-18-g5258 From 200b306f2006533a0e7a0ca75cb3103620e7aa84 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 26 Jan 2013 13:31:37 +0000 Subject: X86: Decode PALIGN operands so I don't have to do it in my head. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173572 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstPrinter/X86InstComments.cpp | 22 ++++++++++++++++++++++ lib/Target/X86/Utils/X86ShuffleDecode.cpp | 8 ++++++++ lib/Target/X86/Utils/X86ShuffleDecode.h | 2 ++ lib/Target/X86/X86ISelLowering.cpp | 5 ++++- 4 files changed, 36 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 64ac5e685f..d8a45ea973 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -69,6 +69,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeMOVHLPSMask(2, ShuffleMask); break; + case X86::PALIGNR128rr: + case X86::VPALIGNR128rr: + Src1Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PALIGNR128rm: + case X86::VPALIGNR128rm: + Src2Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePALIGNMask(MVT::v16i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::VPALIGNR256rr: + Src1Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPALIGNR256rm: + Src2Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePALIGNMask(MVT::v32i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + case X86::PSHUFDri: case X86::VPSHUFDri: Src1Name = getRegName(MI->getOperand(1).getReg()); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 8b87c1f9c8..9694808e64 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -61,6 +61,14 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask) { ShuffleMask.push_back(NElts+i); } +void DecodePALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); + + for (unsigned i = 0; i != NumElts; ++i) + ShuffleMask.push_back((i + Offset) % (NumElts * 2)); +} + /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 70d8171a81..69ce4432ed 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -35,6 +35,8 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); +void DecodePALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); + void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6daa9b65e6..76ec12c740 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4592,6 +4592,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLHPS: DecodeMOVLHPSMask(NumElems, Mask); break; + case X86ISD::PALIGN: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePALIGNMask(VT, cast(ImmN)->getZExtValue(), Mask); + break; case X86ISD::PSHUFD: case X86ISD::VPERMILP: ImmN = N->getOperand(N->getNumOperands()-1); @@ -4635,7 +4639,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLPS: case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: - case X86ISD::PALIGN: // Not yet implemented return false; default: llvm_unreachable("unknown target shuffle node"); -- cgit v1.2.3-18-g5258 From 4aee1bb2223e59efb814a694edaecd07a3418da0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2013 06:48:25 +0000 Subject: Fix inconsistent usage of PALIGN and PALIGNR when referring to the same instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173667 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstPrinter/X86InstComments.cpp | 12 ++++----- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 3 ++- lib/Target/X86/Utils/X86ShuffleDecode.h | 2 +- lib/Target/X86/X86ISelLowering.cpp | 14 +++++------ lib/Target/X86/X86ISelLowering.h | 2 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 +- lib/Target/X86/X86InstrSSE.td | 34 +++++++++++++------------- 7 files changed, 35 insertions(+), 34 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index d8a45ea973..43a8f0f865 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -77,9 +77,9 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPALIGNR128rm: Src2Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodePALIGNMask(MVT::v16i8, - MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodePALIGNRMask(MVT::v16i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); break; case X86::VPALIGNR256rr: Src1Name = getRegName(MI->getOperand(2).getReg()); @@ -87,9 +87,9 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPALIGNR256rm: Src2Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodePALIGNMask(MVT::v32i8, - MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodePALIGNRMask(MVT::v32i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); case X86::PSHUFDri: case X86::VPSHUFDri: diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 9694808e64..b490f27025 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -61,7 +61,8 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask) { ShuffleMask.push_back(NElts+i); } -void DecodePALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { +void DecodePALIGNRMask(MVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 69ce4432ed..017ab325ec 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -35,7 +35,7 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); -void DecodePALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 76ec12c740..a4eae0a8f8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3004,7 +3004,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::SHUFP: - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: case X86ISD::MOVHLPS: @@ -3054,7 +3054,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::SHUFP: case X86ISD::VPERM2X128: return DAG.getNode(Opc, dl, VT, V1, V2, @@ -4592,9 +4592,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLHPS: DecodeMOVLHPSMask(NumElems, Mask); break; - case X86ISD::PALIGN: + case X86ISD::PALIGNR: ImmN = N->getOperand(N->getNumOperands()-1); - DecodePALIGNMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodePALIGNRMask(VT, cast(ImmN)->getZExtValue(), Mask); break; case X86ISD::PSHUFD: case X86ISD::VPERMILP: @@ -6932,7 +6932,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // nodes, and remove one by one until they don't return Op anymore. if (isPALIGNRMask(M, VT, Subtarget)) - return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, + return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2, getShufflePALIGNRImmediate(SVOp), DAG); @@ -12435,7 +12435,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; - case X86ISD::PALIGN: return "X86ISD::PALIGN"; + case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; @@ -17416,7 +17416,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::UNPCKH: case X86ISD::UNPCKL: case X86ISD::MOVHLPS: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6d5e8c2180..5e84e277c0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -294,7 +294,7 @@ namespace llvm { TESTP, // Several flavors of instructions with vector shuffle behaviors. - PALIGN, + PALIGNR, PSHUFD, PSHUFHW, PSHUFLW, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7025e93fa1..2a72fb6f7b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -160,7 +160,7 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; -def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; +def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 18c3dfe8f2..0979752757 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5167,7 +5167,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass ssse3_palign { +multiclass ssse3_palignr { let neverHasSideEffects = 1 in { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), @@ -5187,7 +5187,7 @@ multiclass ssse3_palign { } } -multiclass ssse3_palign_y { +multiclass ssse3_palignr_y { let neverHasSideEffects = 1 in { def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), @@ -5204,42 +5204,42 @@ multiclass ssse3_palign_y { } let Predicates = [HasAVX] in - defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; + defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V; let Predicates = [HasAVX2] in - defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L; + defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in - defm PALIGN : ssse3_palign<"palignr">; + defm PALIGN : ssse3_palignr<"palignr">; let Predicates = [HasAVX2] in { -def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; } let Predicates = [HasAVX] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } let Predicates = [UseSSSE3] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } -- cgit v1.2.3-18-g5258 From 53597b2c5cd0ace8683fd7aab33f8d40c085a49d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2013 07:19:11 +0000 Subject: Add missing break in 256-bit palignr comment printing. No test case yet because the comment itself is still wrong. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173669 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstPrinter/X86InstComments.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 43a8f0f865..865930af72 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -90,6 +90,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodePALIGNRMask(MVT::v32i8, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); + break; case X86::PSHUFDri: case X86::VPSHUFDri: -- cgit v1.2.3-18-g5258 From 467016e58d57021b14f2ae562d221f00b07cb254 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2013 07:41:18 +0000 Subject: Fix 256-bit PALIGNR comment decoding to understand that it works on independent 256-bit lanes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index b490f27025..bbd490411f 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -66,8 +66,17 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm, unsigned NumElts = VT.getVectorNumElements(); unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); - for (unsigned i = 0; i != NumElts; ++i) - ShuffleMask.push_back((i + Offset) % (NumElts * 2)); + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Base = i + Offset; + // if i+offset is out of this lane then we actually need the other source + if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; + ShuffleMask.push_back(Base + l); + } + } } /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. -- cgit v1.2.3-18-g5258 From 8688a58c53b46d2dda9bf50dafd5195790a7ed58 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 29 Jan 2013 02:32:37 +0000 Subject: Teach SDISel to combine fsin / fcos into a fsincos node if the following conditions are met: 1. They share the same operand and are in the same BB. 2. Both outputs are used. 3. The target has a native instruction that maps to ISD::FSINCOS node or the target provides a sincos library call. Implemented the generic optimization in sdisel and enabled it for Mac OSX. Also added an additional optimization for x86_64 Mac OSX by using an alternative entry point __sincos_stret which returns the two results in xmm0 / xmm1. rdar://13087969 PR13204 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173755 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 95 ++++++++++++++++++++++++++++++++------ lib/Target/X86/X86ISelLowering.h | 2 +- lib/Target/X86/X86Subtarget.cpp | 5 ++ lib/Target/X86/X86Subtarget.h | 4 ++ 4 files changed, 91 insertions(+), 15 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a4eae0a8f8..ca606daaba 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -605,10 +605,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); // We don't support sin/cos/fmod - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Expand FP immediates into loads from the stack, except for the special // cases we handle. @@ -633,8 +635,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // We don't support sin/cos/fmod - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Special cases we handle for FP constants. addLegalFPImmediate(APFloat(+0.0f)); // xorps @@ -644,8 +647,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f64 , Expand); - setOperationAction(ISD::FCOS , MVT::f64 , Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); } } else if (!TM.Options.UseSoftFloat) { // f32 and f64 in x87. @@ -659,10 +663,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f32 , Expand); - setOperationAction(ISD::FSIN , MVT::f64 , Expand); - setOperationAction(ISD::FCOS , MVT::f32 , Expand); - setOperationAction(ISD::FCOS , MVT::f64 , Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); } addLegalFPImmediate(APFloat(+0.0)); // FLD0 addLegalFPImmediate(APFloat(+1.0)); // FLD1 @@ -699,8 +705,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f80 , Expand); - setOperationAction(ISD::FCOS , MVT::f80 , Expand); + setOperationAction(ISD::FSIN , MVT::f80, Expand); + setOperationAction(ISD::FCOS , MVT::f80, Expand); + setOperationAction(ISD::FSINCOS, MVT::f80, Expand); } setOperationAction(ISD::FFLOOR, MVT::f80, Expand); @@ -748,7 +755,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); @@ -1281,6 +1290,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setLibcallName(RTLIB::SRA_I128, 0); } + // Combine sin / cos into one node or libcall if possible. + if (Subtarget->hasSinCos()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + if (Subtarget->isTargetDarwin() && Subtarget->is64Bit()) { + // For MacOSX, we don't want to the normal expansion of a libcall to + // sincos. We want to issue a libcall to __sincos_stret to avoid memory + // traffic. + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + } + } + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); @@ -12014,6 +12036,50 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin()); + + // For MacOSX, we want to call an alternative entry point: __sincos_stret, + // which returns the values in two XMM registers. + DebugLoc dl = Op.getDebugLoc(); + SDValue Arg = Op.getOperand(0); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + ArgListTy Args; + ArgListEntry Entry; + + Entry.Node = Arg; + Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + const char *LibcallName = (ArgVT == MVT::f64) + ? "__sincos_stret" : "__sincosf_stret"; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, + false, false, false, false, 0, + CallingConv::C, /*isTaillCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed*/true, + Callee, Args, DAG, dl); + std::pair CallResult = LowerCallTo(CLI); +#if 1 + return CallResult.first; +#else + SDValue RetSin = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT, + CallResult.first, DAG.getIntPtrConstant(0)); + SDValue RetCos = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT, + CallResult.first, DAG.getIntPtrConstant(1)); + + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, RetSin, RetCos); +#endif +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12096,6 +12162,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); case ISD::SDIV: return LowerSDIV(Op, DAG); + case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 5e84e277c0..6758ed1404 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -838,8 +838,8 @@ namespace llvm { SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 53c28f4fce..dad95c6800 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -155,6 +155,11 @@ const char *X86Subtarget::getBZeroEntry() const { return 0; } +bool X86Subtarget::hasSinCos() const { + return getTargetTriple().isMacOSX() && + !getTargetTriple().isMacOSXVersionLT(10, 9); +} + /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index b325f62126..eb587a5761 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -328,6 +328,10 @@ public: /// memset with zero passed as the second argument. Otherwise it /// returns null. const char *getBZeroEntry() const; + + /// This function returns true if the target has sincos() routine in its + /// compiler runtime or math libraries. + bool hasSinCos() const; /// enablePostRAScheduler - run for Atom optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, -- cgit v1.2.3-18-g5258 From e6482fabd20a2a5b4f81aff55812782f3b617514 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Jan 2013 07:54:31 +0000 Subject: Merge SSE and AVX shuffle instructions in the comment printer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173777 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstPrinter/X86InstComments.cpp | 112 ++++--------------------- 1 file changed, 14 insertions(+), 98 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 865930af72..0f6eeb19bc 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -34,10 +34,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, switch (MI->getOpcode()) { case X86::INSERTPSrr: - Src1Name = getRegName(MI->getOperand(0).getReg()); - Src2Name = getRegName(MI->getOperand(2).getReg()); - DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); - break; case X86::VINSERTPSrr: DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -46,10 +42,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::MOVLHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeMOVLHPSMask(2, ShuffleMask); - break; case X86::VMOVLHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -58,10 +50,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::MOVHLPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeMOVHLPSMask(2, ShuffleMask); - break; case X86::VMOVHLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -154,15 +142,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::PUNPCKHBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHBWrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v16i8, ShuffleMask); - break; case X86::VPUNPCKHBWrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHBWrm: case X86::VPUNPCKHBWrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -177,15 +160,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKHWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHWDrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v8i16, ShuffleMask); - break; case X86::VPUNPCKHWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHWDrm: case X86::VPUNPCKHWDrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -200,15 +178,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKHDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v4i32, ShuffleMask); - break; case X86::VPUNPCKHDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHDQrm: case X86::VPUNPCKHDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -223,15 +196,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKHQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHQDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v2i64, ShuffleMask); - break; case X86::VPUNPCKHQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHQDQrm: case X86::VPUNPCKHQDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -247,15 +215,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::PUNPCKLBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLBWrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v16i8, ShuffleMask); - break; case X86::VPUNPCKLBWrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLBWrm: case X86::VPUNPCKLBWrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -270,15 +233,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKLWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLWDrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v8i16, ShuffleMask); - break; case X86::VPUNPCKLWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLWDrm: case X86::VPUNPCKLWDrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -293,15 +251,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKLDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v4i32, ShuffleMask); - break; case X86::VPUNPCKLDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLDQrm: case X86::VPUNPCKLDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -316,15 +269,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKLQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLQDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v2i64, ShuffleMask); - break; case X86::VPUNPCKLQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLQDQrm: case X86::VPUNPCKLQDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -340,16 +288,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::SHUFPDrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::SHUFPDrmi: - DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VSHUFPDrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::SHUFPDrmi: case X86::VSHUFPDrmi: DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -367,16 +309,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::SHUFPSrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::SHUFPSrmi: - DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VSHUFPSrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::SHUFPSrmi: case X86::VSHUFPSrmi: DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -394,15 +330,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::UNPCKLPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKLPDrm: - DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKLPDrm: case X86::VUNPCKLPDrm: DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -417,15 +348,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKLPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKLPSrm: - DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKLPSrm: case X86::VUNPCKLPSrm: DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -440,15 +366,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPDrm: - DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKHPDrm: case X86::VUNPCKHPDrm: DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -463,15 +384,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPSrm: - DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKHPSrm: case X86::VUNPCKHPSrm: DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); -- cgit v1.2.3-18-g5258 From 7c1ac767691b2cb5d3367e667e51714f34eb675b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2013 14:05:57 +0000 Subject: Fix typo in X86BaseInfo.h that I introduced in r157818. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173798 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 7ea1961dec..9e68388cf2 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -104,7 +104,7 @@ namespace X86II { /// MO_TLSLD - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the TLS index for the module that - /// contains the symbol. When this index is passed to a call to to + /// contains the symbol. When this index is passed to a call to /// __tls_get_addr, the function will return the base address of the TLS /// block for the symbol. Used in the x86-64 local dynamic TLS access model. /// @@ -114,7 +114,7 @@ namespace X86II { /// MO_TLSLDM - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the TLS index for the module that - /// contains the symbol. When this index is passed to a call to to + /// contains the symbol. When this index is passed to a call to /// ___tls_get_addr, the function will return the base address of the TLS /// block for the symbol. Used in the IA32 local dynamic TLS access model. /// -- cgit v1.2.3-18-g5258 From 9b5a14d59d827166d1fea5368060c7462d8f1db1 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 29 Jan 2013 18:08:22 +0000 Subject: Remove dead code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173812 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ca606daaba..0a53a3e291 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12067,17 +12067,7 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { /*doesNotRet=*/false, /*isReturnValueUsed*/true, Callee, Args, DAG, dl); std::pair CallResult = LowerCallTo(CLI); -#if 1 return CallResult.first; -#else - SDValue RetSin = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT, - CallResult.first, DAG.getIntPtrConstant(0)); - SDValue RetCos = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT, - CallResult.first, DAG.getIntPtrConstant(1)); - - SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); - return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, RetSin, RetCos); -#endif } /// LowerOperation - Provide custom lowering hooks for some operations. -- cgit v1.2.3-18-g5258 From a66f40a8cc685b2869e7f8d988f9a17439875ece Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 30 Jan 2013 22:56:35 +0000 Subject: Restrict sin/cos optimization to 64-bit only for now. 32-bit is a bit messy and less critical. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173987 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 21 ++++++++++++--------- lib/Target/X86/X86Subtarget.cpp | 3 ++- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0a53a3e291..36d1ad4267 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1294,7 +1294,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->isTargetDarwin() && Subtarget->is64Bit()) { + if (Subtarget->isTargetDarwin()) { // For MacOSX, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret to avoid memory // traffic. @@ -12037,7 +12037,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetDarwin()); + assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); // For MacOSX, we want to call an alternative entry point: __sincos_stret, // which returns the values in two XMM registers. @@ -12054,18 +12054,21 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + + // Only optimize x86_64 for now. i386 is a bit messy. For f32, + // the small struct {f32, f32} is returned in (eax, edx). For f64, + // the results are returned via SRet in memory. const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/true, - Callee, Args, DAG, dl); + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, + false, false, false, false, 0, + CallingConv::C, /*isTaillCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed*/true, + Callee, Args, DAG, dl); std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index dad95c6800..6305501a21 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -157,7 +157,8 @@ const char *X86Subtarget::getBZeroEntry() const { bool X86Subtarget::hasSinCos() const { return getTargetTriple().isMacOSX() && - !getTargetTriple().isMacOSXVersionLT(10, 9); + !getTargetTriple().isMacOSXVersionLT(10, 9) && + is64Bit(); } /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls -- cgit v1.2.3-18-g5258 From a9bd4b4647694f1384a8e1c891931ba7bdff364f Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 31 Jan 2013 00:50:46 +0000 Subject: Check and allow floating point registers to select the size of the register for inline asm. This conforms to how gcc allows for effective casting of inputs into gprs (fprs is already handled). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174008 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 36d1ad4267..c34010c978 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18170,7 +18170,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Res.first = DestReg; Res.second = &X86::GR8RegClass; } - } else if (VT == MVT::i32) { + } else if (VT == MVT::i32 || VT == MVT::f32) { unsigned DestReg = 0; switch (Res.first) { default: break; @@ -18187,7 +18187,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Res.first = DestReg; Res.second = &X86::GR32RegClass; } - } else if (VT == MVT::i64) { + } else if (VT == MVT::i64 || VT == MVT::f64) { unsigned DestReg = 0; switch (Res.first) { default: break; -- cgit v1.2.3-18-g5258 From e187e259963466cc106b8a23c0d5b4d44fdd15a6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 31 Jan 2013 00:50:48 +0000 Subject: Whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174009 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c34010c978..04237e7ef3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12038,17 +12038,17 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); - + // For MacOSX, we want to call an alternative entry point: __sincos_stret, // which returns the values in two XMM registers. DebugLoc dl = Op.getDebugLoc(); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - + ArgListTy Args; ArgListEntry Entry; - + Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; @@ -16512,8 +16512,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Represent the data using the same element type that is stored in // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + EVT WideVecVT = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), loadRegZize/MemVT.getScalarType().getSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && @@ -17199,8 +17199,8 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// Helper function of PerformSETCCCombine. It is to materialize "setb reg" -// as "sbb reg,reg", since it can be extended without zext and produces +// Helper function of PerformSETCCCombine. It is to materialize "setb reg" +// as "sbb reg,reg", since it can be extended without zext and produces // an all-ones bit which is more useful than 0/1 in some cases. static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) { return DAG.getNode(ISD::AND, DL, MVT::i8, @@ -17218,13 +17218,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, SDValue EFLAGS = N->getOperand(1); if (CC == X86::COND_A) { - // Try to convert COND_A into COND_B in an attempt to facilitate + // Try to convert COND_A into COND_B in an attempt to facilitate // materializing "setb reg". // // Do not flip "e > c", where "c" is a constant, because Cmp instruction // cannot take an immediate as its first operand. // - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && EFLAGS.getValueType().isInteger() && !isa(EFLAGS.getOperand(1))) { SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(), -- cgit v1.2.3-18-g5258 From 108fb3202af6f500073cdbb7be32c25d7a273a2e Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 31 Jan 2013 20:02:54 +0000 Subject: [PEI] Pass the frame index operand number to the eliminateFrameIndex function. Each target implementation was needlessly recomputing the index. Part of rdar://13076458 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174083 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 25 ++++++++++--------------- lib/Target/X86/X86RegisterInfo.h | 3 ++- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1dd1e41af8..02da140916 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -544,20 +544,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned BasePtr; unsigned Opc = MI.getOpcode(); @@ -573,7 +567,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // This must be part of a four operand memory reference. Replace the // FrameIndex with base register with EBP. Add an offset to the offset. - MI.getOperand(i).ChangeToRegister(BasePtr, false); + MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); // Now add the frame object offset to the offset from EBP. int FIOffset; @@ -584,17 +578,18 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex); - if (MI.getOperand(i+3).isImm()) { + if (MI.getOperand(FIOperandNum+3).isImm()) { // Offset is a 32-bit integer. - int Imm = (int)(MI.getOperand(i + 3).getImm()); + int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); int Offset = FIOffset + Imm; assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && "Requesting 64-bit offset in 32-bit immediate!"); - MI.getOperand(i + 3).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. - uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset(); - MI.getOperand(i+3).setOffset(Offset); + uint64_t Offset = FIOffset + + (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); + MI.getOperand(FIOperandNum + 3).setOffset(Offset); } } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 7932ede8dd..5b45e9e204 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -122,7 +122,8 @@ public: MachineBasicBlock::iterator MI) const; void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; -- cgit v1.2.3-18-g5258 From 693c37aa86506be657dfaf8835845b0998531c3c Mon Sep 17 00:00:00 2001 From: David Sehr Date: Fri, 1 Feb 2013 19:28:09 +0000 Subject: Two changes relevant to LEA and x32: 1) allows the use of RIP-relative addressing in 32-bit LEA instructions under x86-64 (ILP32 and LP64) 2) separates the size of address registers in 64-bit LEA instructions from control by ILP32/LP64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174208 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 4 ++-- lib/Target/X86/X86InstrInfo.td | 13 +++++++++++++ lib/Target/X86/X86MCInstLower.cpp | 3 ++- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 0eecd5ffc1..7db611c3e1 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -29,11 +29,11 @@ def LEA32r : I<0x8D, MRMSrcMem, def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, + [(set GR32:$dst, lea64_32addr:$src)], IIC_LEA>, Requires<[In64BitMode]>; let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9ecf5e25c4..0d32506a99 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -525,6 +525,13 @@ def lea64_32mem : Operand { let ParserMatchClass = X86MemAsmOperand; } +// Memory operands that use 64-bit pointers in both ILP32 and LP64. +def lea64mem : Operand { + let PrintMethod = "printi64mem"; + let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. @@ -535,6 +542,12 @@ def addr : ComplexPattern; def lea32addr : ComplexPattern; +// In 64-bit mode 32-bit LEAs can use RIP-relative addressing. +def lea64_32addr : ComplexPattern; + def tls32addr : ComplexPattern; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 5a1e1b8dc9..3af1b3e06b 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -239,7 +239,8 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { if (!MI->getOperand(OpNo+i).isReg()) continue; unsigned Reg = MI->getOperand(OpNo+i).getReg(); - if (Reg == 0) continue; + // LEAs can use RIP-relative addressing, and RIP has no sub/super register. + if (Reg == 0 || Reg == X86::RIP) continue; MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); } -- cgit v1.2.3-18-g5258 From 9c02a276049cbd1d1511a88ebc7a22bb33658237 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Mon, 4 Feb 2013 07:03:24 +0000 Subject: More MSan/ASan annotations. This change lets us bootstrap LLVM/Clang under ASan and MSan. It contains fixes for 2 issues: - X86JIT reads return address from stack, which MSan does not know is initialized. - bugpoint tests run binaries with RLIMIT_AS. This does not work with certain Sanitizers. We are no longer including config.h in Compiler.h with this change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174306 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index cca391f647..0a4accee99 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -351,6 +351,9 @@ static void LLVM_ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; + // We are reading raw stack data here. Tell MemorySanitizer that it is + // sufficiently initialized. + __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc)); assert(*RetAddrLoc == RetAddr && "Could not find return address on the stack!"); -- cgit v1.2.3-18-g5258 From 9fa9251bbad191feb52273b0f3d44531a5fd78a0 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 4 Feb 2013 15:19:25 +0000 Subject: X86: Simplify code. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174326 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 04237e7ef3..5b3f06689f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11467,7 +11467,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - LLVMContext *Context = DAG.getContext(); if (!Subtarget->hasSSE2()) return SDValue(); @@ -11587,14 +11586,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1), DAG.getConstant(23, MVT::i32)); - const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U}; - Constant *C = ConstantDataVector::get(*Context, CV); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); - SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); - - Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); + Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT)); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); @@ -15369,13 +15361,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) { APInt A = cast(OpRHS.getOperand(0))->getAPIntValue(); - if (CondRHS.getConstantOperandVal(0) == -A-1) { - SmallVector V(VT.getVectorNumElements(), - DAG.getConstant(-A, VT.getScalarType())); + if (CondRHS.getConstantOperandVal(0) == -A-1) return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - V.data(), V.size())); - } + DAG.getConstant(-A, VT)); } // Another special case: If C was a sign bit, the sub has been @@ -16582,10 +16570,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Build the arithmetic shift. unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - MemVT.getVectorElementType().getSizeInBits(); - SmallVector C(NumElems, - DAG.getConstant(Amt, RegVT.getScalarType())); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size()); - Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV); + Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, + DAG.getConstant(Amt, RegVT)); return DCI.CombineTo(N, Shuff, TF, true); } -- cgit v1.2.3-18-g5258 From a220aeb58f30a6b3fc991883e8107d5e1198c53b Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 4 Feb 2013 15:19:33 +0000 Subject: X86: Open up some opportunities for constant folding by postponing shift lowering. Fixes PR15141. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174327 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5b3f06689f..496202380a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11583,8 +11583,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { - Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1), - DAG.getConstant(23, MVT::i32)); + Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT)); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); @@ -11595,8 +11594,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq."); // a = a << 5; - Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1), - DAG.getConstant(5, MVT::i32)); + Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT)); Op = DAG.getNode(ISD::BITCAST, dl, VT, Op); // Turn 'a' into a mask suitable for VSELECT -- cgit v1.2.3-18-g5258 From d07c2a5fa1fd3bee45104953714a128cf5e19158 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 5 Feb 2013 16:53:11 +0000 Subject: Fix comments git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174390 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CodeEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index bc77334347..ece38aa346 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -124,7 +124,7 @@ template } // end anonymous namespace. /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code -/// to the specified templated MachineCodeEmitter object. +/// to the specified JITCodeEmitter object. FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE) { return new Emitter(TM, JCE); -- cgit v1.2.3-18-g5258 From c3afc760e1a49f29634b7442a3d38bc88a1f113e Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 17:59:48 +0000 Subject: Move MRI liveouts to X86 return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174402 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 14 ++++++++++---- lib/Target/X86/X86ISelLowering.cpp | 10 ++-------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 5facb7be57..b6c15123ae 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -738,6 +738,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (F.isVarArg()) return false; + // Build a list of return value registers. + SmallVector RetRegs; + if (Ret->getNumOperands() > 0) { SmallVector Outs; GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); @@ -805,8 +808,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); - // Mark the register as live out of the function. - MRI.addLiveOut(VA.getLocReg()); + // Add register to return instruction. + RetRegs.push_back(VA.getLocReg()); } // The x86-64 ABI for returning structs by value requires that we copy @@ -819,11 +822,14 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { "SRetReturnReg should have been set in LowerFormalArguments()!"); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), X86::RAX).addReg(Reg); - MRI.addLiveOut(X86::RAX); + RetRegs.push_back(X86::RAX); } // Now emit the RET. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); return true; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 496202380a..c24d41b685 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1590,14 +1590,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); - // Add the regs to the liveout set for the function. - MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) - MRI.addLiveOut(RVLocs[i].getLocReg()); - SDValue Flag; - SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop @@ -1666,6 +1659,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } // The x86-64 ABIs require that for returning structs by value we copy @@ -1686,7 +1680,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); // RAX/EAX now acts like a return value. - MRI.addLiveOut(RetValReg); + RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64)); } RetOps[0] = Chain; // Update chain. -- cgit v1.2.3-18-g5258 From 2a1b60d791522d73be91d4281c90d25bd5e3d117 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 5 Feb 2013 21:53:29 +0000 Subject: Make sure the correct opcodes are used to SUB and ADD the stack pointer in function prologs/epilogs. The opcodes should depend on the data model (LP64 vs. ILP32) rather than the architecture bit-ness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174446 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 420aeb85d3..09019617f5 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -55,8 +55,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MMI.callsUnwindInit() || MMI.callsEHReturn()); } -static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { +static unsigned getSUBriOpcode(unsigned isLP64, int64_t Imm) { + if (isLP64) { if (isInt<8>(Imm)) return X86::SUB64ri8; return X86::SUB64ri32; @@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { } } -static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { +static unsigned getADDriOpcode(unsigned isLP64, int64_t Imm) { + if (isLP64) { if (isInt<8>(Imm)) return X86::ADD64ri8; return X86::ADD64ri32; @@ -145,7 +145,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, int64_t NumBytes, - bool Is64Bit, bool UseLEA, + bool Is64Bit, bool IsLP64, bool UseLEA, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; @@ -154,8 +154,8 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, Opc = getLEArOpcode(Is64Bit); else Opc = isSub - ? getSUBriOpcode(Is64Bit, Offset) - : getADDriOpcode(Is64Bit, Offset); + ? getSUBriOpcode(IsLP64, Offset) + : getADDriOpcode(IsLP64, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -660,6 +660,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); bool Is64Bit = STI.is64Bit(); + bool IsLP64 = STI.isTarget64BitLP64(); bool IsWin64 = STI.isTargetWin64(); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); @@ -711,7 +712,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, - TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), + TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta) @@ -927,7 +928,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // MSVC x64's __chkstk needs to adjust %rsp. // FIXME: %rax preserves the offset and should be available. if (isSPUpdateNeeded) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); if (isEAXAlive) { @@ -939,7 +940,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MBB.insert(MBBI, MI); } } else if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); // If we need a base pointer, set it up here. It's whatever the value @@ -996,6 +997,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); bool Is64Bit = STI.is64Bit(); + bool IsLP64 = STI.isTarget64BitLP64(); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); @@ -1091,7 +1093,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA, + TII, *RegInfo); } // We're returning from function via eh_return. @@ -1126,7 +1129,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64, + UseLEA, TII, *RegInfo); } // Jump to label or value in register. @@ -1169,7 +1173,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Check for possible merge with preceding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII, + *RegInfo); } } -- cgit v1.2.3-18-g5258 From 16221a60a00e52b078f6164ba4475c6e8e918e4b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 6 Feb 2013 20:43:57 +0000 Subject: This is a follow-up on r174446, now taking Atom processors into account. Atoms use LEA for updating SP in prologs/epilogs, and the exact LEA opcode depends on the data model. Also reapplying the test case which was added and then reverted (because of Atom failures), this time specifying explicitly the CPU in addition to the triple. The test case now checks all variations (data mode, cpu Atom vs. Core). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174542 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 09019617f5..84b1c106a0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned isLP64, int64_t Imm) { } } -static unsigned getADDriOpcode(unsigned isLP64, int64_t Imm) { - if (isLP64) { +static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { + if (IsLP64) { if (isInt<8>(Imm)) return X86::ADD64ri8; return X86::ADD64ri32; @@ -79,8 +79,8 @@ static unsigned getADDriOpcode(unsigned isLP64, int64_t Imm) { } } -static unsigned getLEArOpcode(unsigned is64Bit) { - return is64Bit ? X86::LEA64r : X86::LEA32r; +static unsigned getLEArOpcode(unsigned IsLP64) { + return IsLP64 ? X86::LEA64r : X86::LEA32r; } /// findDeadCallerSavedReg - Return a caller-saved register that isn't live @@ -151,7 +151,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, uint64_t Offset = isSub ? -NumBytes : NumBytes; unsigned Opc; if (UseLEA) - Opc = getLEArOpcode(Is64Bit); + Opc = getLEArOpcode(IsLP64); else Opc = isSub ? getSUBriOpcode(IsLP64, Offset) @@ -1083,7 +1083,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (RegInfo->needsStackRealignment(MF)) MBBI = FirstCSPop; if (CSSize != 0) { - unsigned Opc = getLEArOpcode(Is64Bit); + unsigned Opc = getLEArOpcode(IsLP64); addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), FramePtr, false, -CSSize); } else { -- cgit v1.2.3-18-g5258 From 86494d7991050b4ffbfdcf1d15e1ad5e3c28f07b Mon Sep 17 00:00:00 2001 From: Kay Tiong Khoo Date: Mon, 11 Feb 2013 19:46:36 +0000 Subject: *fixed disassembly of some i386 system insts with intel syntax *added file for test cases for i386 intel syntax git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174900 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSystem.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index ea716bfd6b..3caa1b538c 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -352,11 +352,11 @@ def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), // Descriptor-table support instructions def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), - "sgdtw\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; + "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt\t$dst", [], IIC_SGDT>, TB; def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), - "sidtw\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>; + "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>; def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), "sidt\t$dst", []>, TB; def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), @@ -374,11 +374,11 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), "sldt{q}\t$dst", [], IIC_SLDT>, TB; def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), - "lgdtw\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>; + "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>; def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), "lgdt\t$src", [], IIC_LGDT>, TB; def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), - "lidtw\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>; + "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>; def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), "lidt\t$src", [], IIC_LIDT>, TB; def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), -- cgit v1.2.3-18-g5258 From 6c3daabc3ee51a8fcb804e0f110f01e59e0e6d61 Mon Sep 17 00:00:00 2001 From: Kay Tiong Khoo Date: Tue, 12 Feb 2013 00:19:12 +0000 Subject: Added 0x0D to 2-byte opcode extension table for prefetch* variants Fixed decode of existing 3dNow prefetchw instruction Intel is scheduled to add a compatible prefetchw (same encoding) to future CPUs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174920 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Instr3DNow.td | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index 54b91c3edb..bb362f5c7b 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -87,12 +87,10 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), - "prefetch $addr", []>; + "prefetch\t$addr", []>; -// FIXME: Diassembler gets a bogus decode conflict. -let isAsmParserOnly = 1 in def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), - "prefetchw $addr", []>; + "prefetchw\t$addr", []>; // "3DNowA" instructions defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; -- cgit v1.2.3-18-g5258 From 23571f4f2c895d60c9ed23b831f988b49a55478e Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 13 Feb 2013 06:01:05 +0000 Subject: Check i1 as well as i8 variables for 8 bit registers for x86 inline assembly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175036 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c24d41b685..bc29df4981 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18135,7 +18135,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // really want an 8-bit or 32-bit register, map to the appropriate register // class and return the appropriate register. if (Res.second == &X86::GR16RegClass) { - if (VT == MVT::i8) { + if (VT == MVT::i8 || VT == MVT::i1) { unsigned DestReg = 0; switch (Res.first) { default: break; -- cgit v1.2.3-18-g5258 From d29804f80d1cc26ea552b58693ce883f5b13de7a Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 13 Feb 2013 08:02:04 +0000 Subject: Prevent insertion of "vzeroupper" before call that preserves YMM registers, since a caller uses preserved registers across the call. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175043 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86VZeroUpper.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index c4a58874a4..0f77948c0e 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -120,9 +120,19 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { return false; } +static bool clobbersAllYmmRegs(const MachineOperand &MO) { + for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) { + if (!MO.clobbersPhysReg(reg)) + return false; + } + return true; +} + static bool hasYmmReg(MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO)) + return true; if (!MO.isReg()) continue; if (MO.isDebug()) -- cgit v1.2.3-18-g5258 From f09e02f01a817f4daf95ef8d3f1f2545297d32e7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 13 Feb 2013 13:40:35 +0000 Subject: X86: Disable generation of rep;movsl when %esi is used as a base pointer. This happens when there is both stack realignment and a dynamic alloca in the function. If we overwrite %esi (rep;movsl uses fixed registers) we'll lose the base pointer and the next register spill will write into oblivion. Fixes PR15249 and unbreaks firefox on i386/freebsd. Mozilla uses dynamic allocas and freebsd a 4 byte stack alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175057 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86SelectionDAGInfo.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 757e8c70a4..f934fdd859 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -202,6 +202,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); + // ESI might be used as a base pointer, in that case we can't simply overwrite + // the register. Fall back to generic code. + const X86RegisterInfo *TRI = + static_cast(DAG.getTarget().getRegisterInfo()); + if (TRI->hasBasePointer(DAG.getMachineFunction()) && + TRI->getBaseRegister() == X86::ESI) + return SDValue(); + MVT AVT; if (Align & 1) AVT = MVT::i8; -- cgit v1.2.3-18-g5258 From 7b0bc3fe3e8210ec7fffb311443a07d81cbe1308 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 13 Feb 2013 21:33:44 +0000 Subject: [ms-inline-asm] Add support for memory references that have non-immediate displacements. rdar://12974533 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175083 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86AsmPrinter.cpp | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 75fa9d2074..ac5daec2b2 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -252,14 +252,15 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } case MachineOperand::MO_Immediate: - O << '$' << MO.getImm(); + if (AsmVariant == 0) O << '$'; + O << MO.getImm(); return; case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: { - O << '$'; + if (AsmVariant == 0) O << '$'; printSymbolOperand(MO, O); break; } @@ -355,19 +356,23 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, NeedPlus = true; } - assert (DispSpec.isImm() && "Displacement is not an immediate!"); - int64_t DispVal = DispSpec.getImm(); - if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { - if (NeedPlus) { - if (DispVal > 0) - O << " + "; - else { - O << " - "; - DispVal = -DispVal; + if (!DispSpec.isImm()) { + if (NeedPlus) O << " + "; + printOperand(MI, Op+3, O, Modifier, AsmVariant); + } else { + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { + if (NeedPlus) { + if (DispVal > 0) + O << " + "; + else { + O << " - "; + DispVal = -DispVal; + } } + O << DispVal; } - O << DispVal; - } + } O << ']'; } -- cgit v1.2.3-18-g5258 From 48aaf5fd02bbdcde0935929f0c5bf08b1f8f20b8 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 13 Feb 2013 21:59:15 +0000 Subject: Don't build tail calls to functions with three inreg arguments on x86-32 PIC. Fixes PR15250! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175092 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bc29df4981..55de3d3c98 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2814,7 +2814,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) return false; @@ -2853,7 +2853,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // An stdcall caller is expected to clean up its arguments; the callee // isn't going to do that. - if (!CCMatch && CallerCC==CallingConv::X86_StdCall) + if (!CCMatch && CallerCC == CallingConv::X86_StdCall) return false; // Do not sibcall optimize vararg calls unless all arguments are passed via @@ -2973,9 +2973,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // callee-saved registers are restored. These happen to be the same // registers used to pass 'inreg' arguments so watch out for those. if (!Subtarget->is64Bit() && - !isa(Callee) && - !isa(Callee)) { + ((!isa(Callee) && + !isa(Callee)) || + getTargetMachine().getRelocationModel() == Reloc::PIC_)) { unsigned NumInRegs = 0; + // In PIC we need an extra register to formulate the address computation + // for the callee. + unsigned MaxInRegs = + (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) @@ -2984,7 +2990,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, switch (Reg) { default: break; case X86::EAX: case X86::EDX: case X86::ECX: - if (++NumInRegs == 3) + if (++NumInRegs == MaxInRegs) return false; break; } -- cgit v1.2.3-18-g5258 From ad236eb8c6fec966463fd7186b582284e1dcdb1b Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 14 Feb 2013 02:49:18 +0000 Subject: Don't assume the mangling of static functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 0a4accee99..2e003f7a46 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -342,12 +342,6 @@ extern "C" { /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -#if !(defined (X86_64_JIT) && defined(_MSC_VER)) - // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is - // no support for inline assembly -static -#endif void LLVM_ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; -- cgit v1.2.3-18-g5258 From bf7ac42663e087b2effc6b9428eddab3b2475073 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 14 Feb 2013 03:33:34 +0000 Subject: Revert r175120 and r175121. Clang is producing the expected asm names again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175133 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 2e003f7a46..0a4accee99 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -342,6 +342,12 @@ extern "C" { /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { +#if !(defined (X86_64_JIT) && defined(_MSC_VER)) + // the following function is called only from this translation unit, + // unless we are under 64bit Windows with MSC, where there is + // no support for inline assembly +static +#endif void LLVM_ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; -- cgit v1.2.3-18-g5258 From 60b3e18d674489364b6448d376ff6666f106c1fc Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 14 Feb 2013 08:20:26 +0000 Subject: Fixed a bug in X86TargetLowering::LowerVectorIntExtend() (assertion failure). Added a test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175144 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 55de3d3c98..dbc0e016f8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6662,9 +6662,10 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } + LLVMContext *Context = DAG.getContext(); unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift; - EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift); + EVT NeVT = EVT::getIntegerVT(*Context, NBits); + EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift); if (!isTypeLegal(NVT)) return SDValue(); @@ -6683,8 +6684,21 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { // If it's foldable, i.e. normal load with single use, we will let code // selection to fold it. Otherwise, we will short the conversion sequence. if (CIdx && CIdx->getZExtValue() == 0 && - (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) + (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) { + if (V.getValueSizeInBits() > V1.getValueSizeInBits()) { + // The "ext_vec_elt" node is wider than the result node. + // In this case we should extract subvector from V. + // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)). + unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits(); + EVT FullVT = V.getValueType(); + EVT SubVecVT = EVT::getVectorVT(*Context, + FullVT.getVectorElementType(), + FullVT.getVectorNumElements()/Ratio); + V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V, + DAG.getIntPtrConstant(0)); + } V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); + } } return DAG.getNode(ISD::BITCAST, DL, VT, -- cgit v1.2.3-18-g5258 From b39a5529496a9824831c78e9e7249c062a4b754a Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 14 Feb 2013 18:20:48 +0000 Subject: 80-col git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dbc0e016f8..02ac8bfb89 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17430,7 +17430,8 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, if (In.getOpcode() != X86ISD::VZEXT) return SDValue(); - return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0)); + return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), + In.getOperand(0)); } SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, -- cgit v1.2.3-18-g5258 From 7b672ed380cf44894f8b96c52558dcfc136af383 Mon Sep 17 00:00:00 2001 From: Kay Tiong Khoo Date: Thu, 14 Feb 2013 19:08:21 +0000 Subject: added basic support for Intel ADX instructions -feature flag, instructions definitions, test cases git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175196 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 2 ++ lib/Target/X86/X86InstrArithmetic.td | 46 ++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86Subtarget.cpp | 1 + lib/Target/X86/X86Subtarget.h | 4 ++++ 5 files changed, 54 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 3ab2899365..0216252c19 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,6 +120,8 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", "Support RTM instructions">; +def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", + "Support ADX instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 7db611c3e1..d86a4065a7 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1256,3 +1256,49 @@ let Predicates = [HasBMI2] in { let Uses = [RDX] in defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W; } + +//===----------------------------------------------------------------------===// +// ADCX Instruction +// +let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "adcx{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8, OpSize; + + def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "adcx{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + + let mayLoad = 1 in { + def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "adcx{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8, OpSize; + + def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "adcx{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + } +} + +//===----------------------------------------------------------------------===// +// ADOX Instruction +// +let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "adox{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8XS; + + def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "adox{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>; + + let mayLoad = 1 in { + def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "adox{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8XS; + + def ADOX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "adox{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8XS, REX_W, Requires<[In64BitMode]>; + } +} diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0d32506a99..84c278c7de 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -603,6 +603,7 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; +def HasADX : Predicate<"Subtarget->hasADX()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 6305501a21..b8ecf4466a 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -349,6 +349,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasBMI(false) , HasBMI2(false) , HasRTM(false) + , HasADX(false) , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index eb587a5761..1466041712 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -121,6 +121,9 @@ protected: /// HasRTM - Processor has RTM instructions. bool HasRTM; + /// HasADX - Processor has ADX instructions. + bool HasADX; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -242,6 +245,7 @@ public: bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } bool hasRTM() const { return HasRTM; } + bool hasADX() const { return HasADX; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } -- cgit v1.2.3-18-g5258 From fe9b5a4f74c5686ab479dd42bd87e654834a8ba4 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Thu, 14 Feb 2013 21:50:09 +0000 Subject: Simplify code. Remove "else after return". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175212 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b6c15123ae..7f230ff571 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -326,12 +326,11 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned &ResultReg) { unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src, /*TODO: Kill=*/false); - - if (RR != 0) { - ResultReg = RR; - return true; - } else + if (RR == 0) return false; + + ResultReg = RR; + return true; } /// X86SelectAddress - Attempt to fill in an address from the given value. -- cgit v1.2.3-18-g5258 From d6f19c716378bce0acc3cbfc9dc9297468f046a0 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 14 Feb 2013 23:17:03 +0000 Subject: The operand listing is very much outdated. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175220 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6758ed1404..958ceb0f89 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -234,11 +234,8 @@ namespace llvm { // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. EH_SJLJ_LONGJMP, - /// TC_RETURN - Tail call return. - /// operand #0 chain - /// operand #1 callee (register or absolute) - /// operand #2 stack adjustment - /// operand #3 optional in flag + /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for + /// the list of operands. TC_RETURN, // VZEXT_MOVL - Vector move low and zero extend. -- cgit v1.2.3-18-g5258 From 74b3c8da4800c7e8ba8f019879db29738ecc5f74 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 15 Feb 2013 12:30:38 +0000 Subject: Make helpers static. Add missing include so LLVMInitializeObjCARCOpts gets C linkage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175264 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 02da140916..401eefb16a 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,7 +50,7 @@ ForceStackAlign("force-align-stack", " needed for the function."), cl::init(false), cl::Hidden); -cl::opt +static cl::opt EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); -- cgit v1.2.3-18-g5258 From 9fa05f98e0e8410bc8c5e4000e0d47880f8b37c4 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 15 Feb 2013 14:08:43 +0000 Subject: Don't make assumptions about the mangling of static functions in extern "C" blocks. We still don't have consensus if we should try to change clang or the standard, but llvm should work with compilers that implement the current standard and mangle those functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175267 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 0a4accee99..3ae0630b60 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -79,7 +79,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; # define CFI(x) #endif -// Provide a wrapper for X86CompilationCallback2 that saves non-traditional +// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional // callee saved registers, for the fastcc calling convention. extern "C" { #if defined(X86_64_JIT) @@ -131,12 +131,12 @@ extern "C" { "subq $32, %rsp\n" "movq %rbp, %rcx\n" // Pass prev frame and return address "movq 8(%rbp), %rdx\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "addq $32, %rsp\n" #else "movq %rbp, %rdi\n" // Pass prev frame and return address "movq 8(%rbp), %rsi\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" #endif // Restore all XMM arg registers "movaps 112(%rsp), %xmm7\n" @@ -213,7 +213,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "movl %ebp, %esp\n" // Restore ESP CFI(".cfi_def_cfa_register %esp\n") "subl $12, %esp\n" @@ -269,7 +269,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "addl $16, %esp\n" "movaps 48(%esp), %xmm3\n" CFI(".cfi_restore %xmm3\n") @@ -300,10 +300,7 @@ extern "C" { SIZE(X86CompilationCallback_SSE) ); # else - // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is - // no support for inline assembly - static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); + void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); _declspec(naked) void X86CompilationCallback(void) { __asm { @@ -317,7 +314,7 @@ extern "C" { mov eax, dword ptr [ebp+4] mov dword ptr [esp+4], eax mov dword ptr [esp], ebp - call X86CompilationCallback2 + call LLVMX86CompilationCallback2 mov esp, ebp sub esp, 12 pop ecx @@ -337,19 +334,12 @@ extern "C" { #endif } -/// X86CompilationCallback2 - This is the target-specific function invoked by the +/// This is the target-specific function invoked by the /// function stub when we did not know the real target of a call. This function /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -#if !(defined (X86_64_JIT) && defined(_MSC_VER)) - // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is - // no support for inline assembly -static -#endif -void LLVM_ATTRIBUTE_USED -X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { +void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; // We are reading raw stack data here. Tell MemorySanitizer that it is // sufficiently initialized. @@ -520,7 +510,7 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, // This used to use 0xCD, but that value is used by JITMemoryManager to // initialize the buffer with garbage, which means it may follow a - // noreturn function call, confusing X86CompilationCallback2. PR 4929. + // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929. JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! return Result; } -- cgit v1.2.3-18-g5258 From 8a8a2dcae054a7b4dfea360b9b88e6be53fda40f Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 15 Feb 2013 14:15:59 +0000 Subject: Give these callbacks hidden visibility. It is better to not export them more than we need to and some ELF linkers complain about directly accessing symbols with default visibility. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175268 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 3ae0630b60..aeb05ccdf8 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -339,7 +339,8 @@ extern "C" { /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { +void LLVM_LIBRARY_VISIBILITY LLVMX86CompilationCallback2(intptr_t *StackPtr, + intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; // We are reading raw stack data here. Tell MemorySanitizer that it is // sufficiently initialized. -- cgit v1.2.3-18-g5258 From 4fb25b7d799ea27a2c98a4d9bcf7469cc685db47 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 15 Feb 2013 21:58:13 +0000 Subject: [ms-inline asm] Adjust the EndLoc to account for the ']'. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175312 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index dc15a11537..8c4c447df9 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -911,7 +911,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); - End = Parser.getTok().getEndLoc(); + // Adjust the EndLoc due to the ']'. + End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); Parser.Lex(); return X86Operand::CreateMem(Disp, Start, End, Size); } -- cgit v1.2.3-18-g5258 From 789cb5df9ca61f8a3794a4fbde7cc020fd00a02a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 22:31:27 +0000 Subject: Use the 'target-features' and 'target-cpu' attributes to reset the subtarget features. If two functions require different features (e.g., `-mno-sse' vs. `-msse') then we want to honor that, especially during LTO. We can do that by resetting the subtarget's features depending upon the 'target-feature' attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175314 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 100 ++++++++++++++++++++++++---------------- lib/Target/X86/X86Subtarget.h | 8 +++- 2 files changed, 67 insertions(+), 41 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index b8ecf4466a..58e0d06dfb 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -14,6 +14,8 @@ #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" #include "X86InstrInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -324,46 +326,21 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } } -X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, - unsigned StackAlignOverride, bool is64Bit) - : X86GenSubtargetInfo(TT, CPU, FS) - , X86ProcFamily(Others) - , PICStyle(PICStyles::None) - , X86SSELevel(NoMMXSSE) - , X863DNowLevel(NoThreeDNow) - , HasCMov(false) - , HasX86_64(false) - , HasPOPCNT(false) - , HasSSE4A(false) - , HasAES(false) - , HasPCLMUL(false) - , HasFMA(false) - , HasFMA4(false) - , HasXOP(false) - , HasMOVBE(false) - , HasRDRAND(false) - , HasF16C(false) - , HasFSGSBase(false) - , HasLZCNT(false) - , HasBMI(false) - , HasBMI2(false) - , HasRTM(false) - , HasADX(false) - , IsBTMemSlow(false) - , IsUAMemFast(false) - , HasVectorUAMem(false) - , HasCmpxchg16b(false) - , UseLeaForSP(false) - , HasSlowDivide(false) - , PostRAScheduler(false) - , PadShortFunctions(false) - , stackAlignment(4) - // FIXME: this is a known good value for Yonah. How about others? - , MaxInlineSizeThreshold(128) - , TargetTriple(TT) - , In64BitMode(is64Bit) { - // Determine default and user specified characteristics +void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { + AttributeSet FnAttrs = MF->getFunction()->getAttributes(); + Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-cpu"); + Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-features"); + std::string CPU = + !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; + std::string FS = + !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; + if (!FS.empty()) + resetSubtargetFeatures(CPU, FS); +} + +void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { std::string CPUName = CPU; if (!FS.empty() || !CPU.empty()) { if (CPUName.empty()) { @@ -440,6 +417,49 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, stackAlignment = 16; } +X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, + unsigned StackAlignOverride, bool is64Bit) + : X86GenSubtargetInfo(TT, CPU, FS) + , X86ProcFamily(Others) + , PICStyle(PICStyles::None) + , X86SSELevel(NoMMXSSE) + , X863DNowLevel(NoThreeDNow) + , HasCMov(false) + , HasX86_64(false) + , HasPOPCNT(false) + , HasSSE4A(false) + , HasAES(false) + , HasPCLMUL(false) + , HasFMA(false) + , HasFMA4(false) + , HasXOP(false) + , HasMOVBE(false) + , HasRDRAND(false) + , HasF16C(false) + , HasFSGSBase(false) + , HasLZCNT(false) + , HasBMI(false) + , HasBMI2(false) + , HasRTM(false) + , HasADX(false) + , IsBTMemSlow(false) + , IsUAMemFast(false) + , HasVectorUAMem(false) + , HasCmpxchg16b(false) + , UseLeaForSP(false) + , HasSlowDivide(false) + , PostRAScheduler(false) + , PadShortFunctions(false) + , stackAlignment(4) + // FIXME: this is a known good value for Yonah. How about others? + , MaxInlineSizeThreshold(128) + , TargetTriple(TT) + , StackAlignOverride(StackAlignOverride) + , In64BitMode(is64Bit) { + resetSubtargetFeatures(CPU, FS); +} + bool X86Subtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 1466041712..d1c706725d 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -168,11 +168,13 @@ protected: InstrItineraryData InstrItins; private: + /// StackAlignOverride - Override the stack alignment. + unsigned StackAlignOverride; + /// In64BitMode - True if compiling for 64-bit, false for 32-bit. bool In64BitMode; public: - /// This constructor initializes the data members to match that /// of the specified triple. /// @@ -197,6 +199,10 @@ public: /// instruction. void AutoDetectSubtargetFeatures(); + /// \brief Reset the features for the X86 target. + virtual void resetSubtargetFeatures(const MachineFunction *MF); + void resetSubtargetFeatures(StringRef CPU, StringRef FS); + /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; -- cgit v1.2.3-18-g5258 From 9be8b4fc92e1ace819a78db512c1f945c1471be7 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 23:18:01 +0000 Subject: Reinitialize the ivars in the subtarget. When we're recalculating the feature set of the subtarget, we need to have the ivars in their initial state. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175320 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 72 ++++++++++++++++++++++------------------- lib/Target/X86/X86Subtarget.h | 4 ++- 2 files changed, 42 insertions(+), 34 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 58e0d06dfb..6391acfa80 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -336,8 +336,10 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) + if (!FS.empty()) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + } } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -417,46 +419,50 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { stackAlignment = 16; } +void X86Subtarget::initializeEnvironment() { + PICStyle = PICStyles::None; + X86SSELevel = NoMMXSSE; + X863DNowLevel = NoThreeDNow; + HasCMov = false; + HasX86_64 = false; + HasPOPCNT = false; + HasSSE4A = false; + HasAES = false; + HasPCLMUL = false; + HasFMA = false; + HasFMA4 = false; + HasXOP = false; + HasMOVBE = false; + HasRDRAND = false; + HasF16C = false; + HasFSGSBase = false; + HasLZCNT = false; + HasBMI = false; + HasBMI2 = false; + HasRTM = false; + HasADX = false; + IsBTMemSlow = false; + IsUAMemFast = false; + HasVectorUAMem = false; + HasCmpxchg16b = false; + UseLeaForSP = false; + HasSlowDivide = false; + PostRAScheduler = false; + PadShortFunctions = false; + stackAlignment = 4; + // FIXME: this is a known good value for Yonah. How about others? + MaxInlineSizeThreshold = 128; +} + X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) - , PICStyle(PICStyles::None) - , X86SSELevel(NoMMXSSE) - , X863DNowLevel(NoThreeDNow) - , HasCMov(false) - , HasX86_64(false) - , HasPOPCNT(false) - , HasSSE4A(false) - , HasAES(false) - , HasPCLMUL(false) - , HasFMA(false) - , HasFMA4(false) - , HasXOP(false) - , HasMOVBE(false) - , HasRDRAND(false) - , HasF16C(false) - , HasFSGSBase(false) - , HasLZCNT(false) - , HasBMI(false) - , HasBMI2(false) - , HasRTM(false) - , HasADX(false) - , IsBTMemSlow(false) - , IsUAMemFast(false) - , HasVectorUAMem(false) - , HasCmpxchg16b(false) - , UseLeaForSP(false) - , HasSlowDivide(false) - , PostRAScheduler(false) - , PadShortFunctions(false) - , stackAlignment(4) - // FIXME: this is a known good value for Yonah. How about others? - , MaxInlineSizeThreshold(128) , TargetTriple(TT) , StackAlignOverride(StackAlignOverride) , In64BitMode(is64Bit) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index d1c706725d..e97da4b6f4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -201,8 +201,10 @@ public: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); - +public: /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; -- cgit v1.2.3-18-g5258 From ba6867d0ce3de9b7b4385f98d215edfcd36c4b32 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 23:22:32 +0000 Subject: Temporary revert of 175320. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175322 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 72 +++++++++++++++++++---------------------- lib/Target/X86/X86Subtarget.h | 4 +-- 2 files changed, 34 insertions(+), 42 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 6391acfa80..58e0d06dfb 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -336,10 +336,8 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) { - initializeEnvironment(); + if (!FS.empty()) resetSubtargetFeatures(CPU, FS); - } } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -419,50 +417,46 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { stackAlignment = 16; } -void X86Subtarget::initializeEnvironment() { - PICStyle = PICStyles::None; - X86SSELevel = NoMMXSSE; - X863DNowLevel = NoThreeDNow; - HasCMov = false; - HasX86_64 = false; - HasPOPCNT = false; - HasSSE4A = false; - HasAES = false; - HasPCLMUL = false; - HasFMA = false; - HasFMA4 = false; - HasXOP = false; - HasMOVBE = false; - HasRDRAND = false; - HasF16C = false; - HasFSGSBase = false; - HasLZCNT = false; - HasBMI = false; - HasBMI2 = false; - HasRTM = false; - HasADX = false; - IsBTMemSlow = false; - IsUAMemFast = false; - HasVectorUAMem = false; - HasCmpxchg16b = false; - UseLeaForSP = false; - HasSlowDivide = false; - PostRAScheduler = false; - PadShortFunctions = false; - stackAlignment = 4; - // FIXME: this is a known good value for Yonah. How about others? - MaxInlineSizeThreshold = 128; -} - X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) + , PICStyle(PICStyles::None) + , X86SSELevel(NoMMXSSE) + , X863DNowLevel(NoThreeDNow) + , HasCMov(false) + , HasX86_64(false) + , HasPOPCNT(false) + , HasSSE4A(false) + , HasAES(false) + , HasPCLMUL(false) + , HasFMA(false) + , HasFMA4(false) + , HasXOP(false) + , HasMOVBE(false) + , HasRDRAND(false) + , HasF16C(false) + , HasFSGSBase(false) + , HasLZCNT(false) + , HasBMI(false) + , HasBMI2(false) + , HasRTM(false) + , HasADX(false) + , IsBTMemSlow(false) + , IsUAMemFast(false) + , HasVectorUAMem(false) + , HasCmpxchg16b(false) + , UseLeaForSP(false) + , HasSlowDivide(false) + , PostRAScheduler(false) + , PadShortFunctions(false) + , stackAlignment(4) + // FIXME: this is a known good value for Yonah. How about others? + , MaxInlineSizeThreshold(128) , TargetTriple(TT) , StackAlignOverride(StackAlignOverride) , In64BitMode(is64Bit) { - initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e97da4b6f4..d1c706725d 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -201,10 +201,8 @@ public: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); -private: - void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); -public: + /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; -- cgit v1.2.3-18-g5258 From b56606274d43c7a3e01b18a08d1115fbf2889996 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sat, 16 Feb 2013 01:25:28 +0000 Subject: [ms-inline asm] Do not omit the frame pointer if we have ms-inline assembly. If the frame pointer is omitted, and any stack changes occur in the inline assembly, e.g.: "pusha", then any C local variable or C argument references will be incorrect. I pass no judgement on anyone who would do such a thing. ;) rdar://13218191 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175334 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 84b1c106a0..950fd39742 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -50,7 +50,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || + MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() || MF.getInfo()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.callsEHReturn()); } -- cgit v1.2.3-18-g5258 From 901d80065c9afa0ba33e8546c2e1e99a00aceb14 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sat, 16 Feb 2013 01:36:26 +0000 Subject: Reinitialize the ivars in the subtarget so that they can be reset with the new features. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175336 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 70 ++++++++++++++++++++++------------------- lib/Target/X86/X86Subtarget.h | 4 ++- 2 files changed, 41 insertions(+), 33 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 58e0d06dfb..0f2c008ab9 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -336,8 +336,10 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) + if (!FS.empty()) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + } } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -417,46 +419,50 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { stackAlignment = 16; } +void X86Subtarget::initializeEnvironment() { + X86SSELevel = NoMMXSSE; + X863DNowLevel = NoThreeDNow; + HasCMov = false; + HasX86_64 = false; + HasPOPCNT = false; + HasSSE4A = false; + HasAES = false; + HasPCLMUL = false; + HasFMA = false; + HasFMA4 = false; + HasXOP = false; + HasMOVBE = false; + HasRDRAND = false; + HasF16C = false; + HasFSGSBase = false; + HasLZCNT = false; + HasBMI = false; + HasBMI2 = false; + HasRTM = false; + HasADX = false; + IsBTMemSlow = false; + IsUAMemFast = false; + HasVectorUAMem = false; + HasCmpxchg16b = false; + UseLeaForSP = false; + HasSlowDivide = false; + PostRAScheduler = false; + PadShortFunctions = false; + stackAlignment = 4; + // FIXME: this is a known good value for Yonah. How about others? + MaxInlineSizeThreshold = 128; +} + X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) , PICStyle(PICStyles::None) - , X86SSELevel(NoMMXSSE) - , X863DNowLevel(NoThreeDNow) - , HasCMov(false) - , HasX86_64(false) - , HasPOPCNT(false) - , HasSSE4A(false) - , HasAES(false) - , HasPCLMUL(false) - , HasFMA(false) - , HasFMA4(false) - , HasXOP(false) - , HasMOVBE(false) - , HasRDRAND(false) - , HasF16C(false) - , HasFSGSBase(false) - , HasLZCNT(false) - , HasBMI(false) - , HasBMI2(false) - , HasRTM(false) - , HasADX(false) - , IsBTMemSlow(false) - , IsUAMemFast(false) - , HasVectorUAMem(false) - , HasCmpxchg16b(false) - , UseLeaForSP(false) - , HasSlowDivide(false) - , PostRAScheduler(false) - , PadShortFunctions(false) - , stackAlignment(4) - // FIXME: this is a known good value for Yonah. How about others? - , MaxInlineSizeThreshold(128) , TargetTriple(TT) , StackAlignOverride(StackAlignOverride) , In64BitMode(is64Bit) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index d1c706725d..e97da4b6f4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -201,8 +201,10 @@ public: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); - +public: /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; -- cgit v1.2.3-18-g5258 From 30fcfc39e2abdb7765c282e8c9d4c63c3d4f83b1 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sat, 16 Feb 2013 13:34:26 +0000 Subject: Minor cleanups. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175359 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 02ac8bfb89..f0e03529d1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2655,8 +2655,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. - return DAG.getNode(X86ISD::TC_RETURN, dl, - NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); } Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); @@ -4222,10 +4221,11 @@ static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { - return ((isa(Elt) && - cast(Elt)->isNullValue()) || - (isa(Elt) && - cast(Elt)->getValueAPF().isPosZero())); + if (ConstantSDNode *CN = dyn_cast(Elt)) + return CN->isNullValue(); + if (ConstantFPSDNode *CFP = dyn_cast(Elt)) + return CFP->getValueAPF().isPosZero(); + return false; } /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in @@ -15675,7 +15675,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, ConstantSDNode *CmpAgainst = 0; if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && (CmpAgainst = dyn_cast(Cond.getOperand(1))) && - dyn_cast(Cond.getOperand(0)) == 0) { + !isa(Cond.getOperand(0))) { if (CC == X86::COND_NE && CmpAgainst == dyn_cast(FalseOp)) { @@ -15955,8 +15955,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, if (VT == MVT::f32 || VT == MVT::f64) { bool ExpectingFlags = false; // Check for any users that want flags: - for (SDNode::use_iterator UI = N->use_begin(), - UE = N->use_end(); + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); !ExpectingFlags && UI != UE; ++UI) switch (UI->getOpcode()) { default: -- cgit v1.2.3-18-g5258 From 3d6417d170fa832a9b1bf02cd5d9fc69cacd16cb Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 16 Feb 2013 16:04:29 +0000 Subject: [msvc x64] Update X86CompilationCallback_Win64.asm corresponding to r175267. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175363 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CompilationCallback_Win64.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm index f321778db2..69b4c71651 100644 --- a/lib/Target/X86/X86CompilationCallback_Win64.asm +++ b/lib/Target/X86/X86CompilationCallback_Win64.asm @@ -11,7 +11,7 @@ ;; ;;===----------------------------------------------------------------------=== -extrn X86CompilationCallback2: PROC +extrn LLVMX86CompilationCallback2: PROC .code X86CompilationCallback proc @@ -42,7 +42,7 @@ X86CompilationCallback proc ; Pass prev frame and return address. mov rcx, rbp mov rdx, qword ptr [rbp+8] - call X86CompilationCallback2 + call LLVMX86CompilationCallback2 ; Restore all XMM arg registers. movaps xmm3, [rsp+48+32] -- cgit v1.2.3-18-g5258 From d61932bf844134d886b57e6730a5ae0831ebd115 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sun, 17 Feb 2013 18:35:25 +0000 Subject: Return false instead of 0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175402 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7f230ff571..509095c61a 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -726,7 +726,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // Don't handle popping bytes on return for now. if (X86MFInfo->getBytesToPopOnReturn() != 0) - return 0; + return false; // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. -- cgit v1.2.3-18-g5258 From 774ec7ba05606925966899d7dbe30453a3a2a877 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 17 Feb 2013 23:34:14 +0000 Subject: X86: Add a note. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175408 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README-SSE.txt | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 40110353fc..496b704ee8 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -953,3 +953,12 @@ similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should turn into hsubpd also. //===---------------------------------------------------------------------===// + +define <2 x i32> @foo(<2 x double> %in) { + %x = fptosi <2 x double> %in to <2 x i32> + ret <2 x i32> %x +} + +Should compile into cvttpd2dq instead of being scalarized into 2 cvttsd2si. + +//===---------------------------------------------------------------------===// -- cgit v1.2.3-18-g5258 From 98fbe27ac8f0766ea94b89b8c03418131b72bea4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 18 Feb 2013 20:55:12 +0000 Subject: Support for HiPE-compatible code emission, patch by Yiannis Tsiouris. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175457 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 160 ++++++++++++++++++++++++++++++++++-- lib/Target/X86/X86FrameLowering.h | 2 + 2 files changed, 157 insertions(+), 5 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 950fd39742..eb9f865085 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1387,16 +1387,25 @@ HasNestArgument(const MachineFunction *MF) { } -/// GetScratchRegister - Get a register for performing work in the segmented -/// stack prologue. Depending on platform and the properties of the function -/// either one or two registers will be needed. Set primary to true for -/// the first register, false for the second. +/// GetScratchRegister - Get a temp register for performing work in the +/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform +/// and the properties of the function either one or two registers will be +/// needed. Set primary to true for the first register, false for the second. static unsigned GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) { + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + + // Erlang stuff. + if (CallingConvention == CallingConv::HiPE) { + if (Is64Bit) + return Primary ? X86::R14 : X86::R13; + else + return Primary ? X86::EBX : X86::EDI; + } + if (Is64Bit) return Primary ? X86::R11 : X86::R12; - CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); bool IsNested = HasNestArgument(&MF); if (CallingConvention == CallingConv::X86_FastCall || @@ -1603,3 +1612,144 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.verify(); #endif } + +// Erlang programs may need a special prologue to handle the stack size they +// might need at runtime. That is because Erlang/OTP does not implement a C +// stack but uses a custom implementation of hybrid stack/heap +// architecture. (for more information see Eric Stenman's Ph.D. thesis: +// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) +// +// +// CheckStack: +// temp0 = sp - MaxStack +// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +// OldStart: +// ... +// IncStack: +// call inc_stack # doubles the stack space +// temp0 = sp - MaxStack +// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { + const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86Subtarget *ST = &MF.getTarget().getSubtarget(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const uint64_t SlotSize = TM.getRegisterInfo()->getSlotSize(); + const bool Is64Bit = STI.is64Bit(); + DebugLoc DL; + // HiPE-specific values + const unsigned HipeLeafWords = 24; + const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; + const unsigned Guaranteed = HipeLeafWords * SlotSize; + const unsigned CallerStkArity = + std::max(0, MF.getFunction()->arg_size() - CCRegisteredArgs); + unsigned MaxStack = + MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize; + + assert(ST->isTargetLinux() && + "HiPE prologue is only supported on Linux operating systems."); + + // Compute the largest caller's frame that is needed to fit the callees' + // frames. This 'MaxStack' is computed from: + // + // a) the fixed frame size, which is the space needed for all spilled temps, + // b) outgoing on-stack parameter areas, and + // c) the minimum stack space this function needs to make available for the + // functions it calls (a tunable ABI property). + if (MFI->hasCalls()) { + unsigned MoreStackForCalls = 0; + + for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); + MBBI != MBBE; ++MBBI) + for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); + MI != ME; ++MI) + if (MI->isCall()) { + // Get callee operand. + const MachineOperand &MO = MI->getOperand(0); + const Function *F; + + // Only take account of global function calls (no closures etc.). + if (!MO.isGlobal()) continue; + if (!(F = dyn_cast(MO.getGlobal()))) continue; + + // Do not update 'MaxStack' for primitive and built-in functions + // (encoded with names either starting with "erlang."/"bif_" or not + // having a ".", such as a simple .., or an + // "_", such as the BIF "suspend_0") as they are executed on another + // stack. + if ((F->getName().find("erlang.") != std::string::npos) || + (F->getName().find("bif_") != std::string::npos)) continue; + if (F->getName().find_first_of("._") == std::string::npos) + continue; + + const uint64_t CalleeStkArity = + std::max(0, F->arg_size() - CCRegisteredArgs); + MoreStackForCalls = std::max( + MoreStackForCalls, (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); + } + MaxStack += MoreStackForCalls; + } + + // If the stack frame needed is larger than the guaranteed then runtime checks + // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. + if (MaxStack > Guaranteed) { + MachineBasicBlock &prologueMBB = MF.front(); + MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); + + for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), + E = prologueMBB.livein_end(); I != E; I++) { + stackCheckMBB->addLiveIn(*I); + incStackMBB->addLiveIn(*I); + } + + MF.push_front(incStackMBB); + MF.push_front(stackCheckMBB); + + unsigned ScratchReg, SPReg, PReg, SPLimitOffset; + unsigned LEAop, CMPop, CALLop; + if (Is64Bit) { + SPReg = X86::RSP; + PReg = X86::RBP; + LEAop = X86::LEA64r; + CMPop = X86::CMP64rm; + CALLop = X86::CALL64pcrel32; + SPLimitOffset = 0x90; + } else { + SPReg = X86::ESP; + PReg = X86::EBP; + LEAop = X86::LEA32r; + CMPop = X86::CMP32rm; + CALLop = X86::CALLpcrel32; + SPLimitOffset = 0x4c; + } + + ScratchReg = GetScratchRegister(Is64Bit, MF, true); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "HiPE prologue scratch register is live-in"); + + // Create new MBB for StackCheck: + addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), + SPReg, false, -MaxStack); + // SPLimitOffset is in a fixed heap location (pointed by BP). + addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) + .addReg(ScratchReg), PReg, false, SPLimitOffset); + BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB); + + // Create new MBB for IncStack: + BuildMI(incStackMBB, DL, TII.get(CALLop)). + addExternalSymbol("inc_stack_0"); + addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), + SPReg, false, -MaxStack); + addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) + .addReg(ScratchReg), PReg, false, SPLimitOffset); + BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB); + + stackCheckMBB->addSuccessor(&prologueMBB, 99); + stackCheckMBB->addSuccessor(incStackMBB, 1); + incStackMBB->addSuccessor(&prologueMBB, 99); + incStackMBB->addSuccessor(incStackMBB, 1); + } +#ifdef XDEBUG + MF.verify(); +#endif +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index dc515dc39c..c35d9528b7 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -43,6 +43,8 @@ public: void adjustForSegmentedStacks(MachineFunction &MF) const; + void adjustForHiPEPrologue(MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; -- cgit v1.2.3-18-g5258 From 6228999d826c87ac90344356c2d123361dc1d648 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 18 Feb 2013 21:45:01 +0000 Subject: Fix a 32/64 bit incompatibility in the HiPE prologue generation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175458 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index eb9f865085..038c39532f 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1682,7 +1682,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { continue; const uint64_t CalleeStkArity = - std::max(0, F->arg_size() - CCRegisteredArgs); + std::max(0, F->arg_size() - CCRegisteredArgs); MoreStackForCalls = std::max( MoreStackForCalls, (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); } -- cgit v1.2.3-18-g5258 From 20ea2bc391bf72480998f456494011636dc19fea Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 18 Feb 2013 22:20:16 +0000 Subject: Remove a useless assert. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175463 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 509095c61a..4e5430d34a 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1377,7 +1377,6 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, else if (Len >= 2) VT = MVT::i16; else { - assert(Len == 1); VT = MVT::i8; } -- cgit v1.2.3-18-g5258 From 82f7815e6f7819223b5f311195bfc1d9645d4754 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 18 Feb 2013 23:08:49 +0000 Subject: X86FrameLowering.cpp: Fix a warning in -Asserts. [-Wunused-variable] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175464 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 038c39532f..a52c4d93e8 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1631,7 +1631,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { const X86InstrInfo &TII = *TM.getInstrInfo(); - const X86Subtarget *ST = &MF.getTarget().getSubtarget(); MachineFrameInfo *MFI = MF.getFrameInfo(); const uint64_t SlotSize = TM.getRegisterInfo()->getSlotSize(); const bool Is64Bit = STI.is64Bit(); @@ -1645,7 +1644,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { unsigned MaxStack = MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize; - assert(ST->isTargetLinux() && + assert(getTarget().getSubtarget()->ST->isTargetLinux() && "HiPE prologue is only supported on Linux operating systems."); // Compute the largest caller's frame that is needed to fit the callees' -- cgit v1.2.3-18-g5258 From 9d7c53af30363038a0acb594201cdb1282510f7b Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 18 Feb 2013 23:15:21 +0000 Subject: X86FrameLowering.cpp: Fixup. Sorry for the breakage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175467 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index a52c4d93e8..c98c80db1a 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1644,7 +1644,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { unsigned MaxStack = MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize; - assert(getTarget().getSubtarget()->ST->isTargetLinux() && + assert(MF.getTarget().getSubtarget().isTargetLinux() && "HiPE prologue is only supported on Linux operating systems."); // Compute the largest caller's frame that is needed to fit the callees' -- cgit v1.2.3-18-g5258 From 56f58ad0e415fcc390cdd4f891e6bf936f0dcf53 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Mon, 18 Feb 2013 23:18:22 +0000 Subject: Use array_pod_sort instead of std::sort. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175472 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f0e03529d1..9ec0502613 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17657,7 +17657,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); - std::sort(AsmPieces.begin(), AsmPieces.end()); + array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (AsmPieces.size() == 4 && AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && @@ -17675,7 +17675,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); - std::sort(AsmPieces.begin(), AsmPieces.end()); + array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (AsmPieces.size() == 4 && AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && -- cgit v1.2.3-18-g5258 From ff3139fe53fdc391972bb0ff4d7bde6ced2f5d5a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 19 Feb 2013 07:43:59 +0000 Subject: Fix capitalization in comment to match function name. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175497 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9ec0502613..9ed03cd1ee 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10210,7 +10210,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } -// getTargetVShiftNOde - Handle vector element shifts where the shift amount +// getTargetVShiftNode - Handle vector element shifts where the shift amount // may or may not be a constant. Takes immediate version of shift as input. static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, SDValue SrcOp, SDValue ShAmt, -- cgit v1.2.3-18-g5258 From faec9a33968acf994892c65901bc6448f0034ecd Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 19 Feb 2013 16:38:32 +0000 Subject: Make pass name more precise and fix comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175525 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86AsmPrinter.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 61eb14e036..bc7496bad1 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -1,4 +1,4 @@ -//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===// +//===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// AT&T assembly code printer class. -// -//===----------------------------------------------------------------------===// #ifndef X86ASMPRINTER_H #define X86ASMPRINTER_H @@ -35,7 +31,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { } virtual const char *getPassName() const LLVM_OVERRIDE { - return "X86 AT&T-Style Assembly Printer"; + return "X86 Assembly / Object Emitter"; } const X86Subtarget &getSubtarget() const { return *Subtarget; } -- cgit v1.2.3-18-g5258 From e5a83d15b247cacbace981c22b793863f5328188 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 19 Feb 2013 17:14:33 +0000 Subject: Move LLVM_LIBRARY_VISIBILITY for consistency with what was done to PPCJITInfo.cpp in r175394. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175531 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index aeb05ccdf8..44d8cce054 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -339,7 +339,7 @@ extern "C" { /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -void LLVM_LIBRARY_VISIBILITY LLVMX86CompilationCallback2(intptr_t *StackPtr, +LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; // We are reading raw stack data here. Tell MemorySanitizer that it is -- cgit v1.2.3-18-g5258 From b1e1d5d4a575f5e5b4ceb7af68f33e75695ee959 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 19 Feb 2013 17:32:57 +0000 Subject: Clean up HiPE prologue emission a bit and avoid signed arithmetic tricks. No intended functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175536 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 90 +++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 43 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index c98c80db1a..2b3d8538af 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1433,7 +1433,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { bool Is64Bit = STI.is64Bit(); unsigned TlsReg, TlsOffset; DebugLoc DL; - const X86Subtarget *ST = &MF.getTarget().getSubtarget(); unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && @@ -1441,8 +1440,8 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { if (MF.getFunction()->isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); - if (!ST->isTargetLinux() && !ST->isTargetDarwin() && - !ST->isTargetWin32() && !ST->isTargetFreeBSD()) + if (!STI.isTargetLinux() && !STI.isTargetDarwin() && + !STI.isTargetWin32() && !STI.isTargetFreeBSD()) report_fatal_error("Segmented stacks not supported on this platform."); MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); @@ -1480,13 +1479,13 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // Read the limit off the current stacklet off the stack_guard location. if (Is64Bit) { - if (ST->isTargetLinux()) { + if (STI.isTargetLinux()) { TlsReg = X86::FS; TlsOffset = 0x70; - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { TlsReg = X86::GS; TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. - } else if (ST->isTargetFreeBSD()) { + } else if (STI.isTargetFreeBSD()) { TlsReg = X86::FS; TlsOffset = 0x18; } else { @@ -1502,16 +1501,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else { - if (ST->isTargetLinux()) { + if (STI.isTargetLinux()) { TlsReg = X86::GS; TlsOffset = 0x30; - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { TlsReg = X86::GS; TlsOffset = 0x48 + 90*4; - } else if (ST->isTargetWin32()) { + } else if (STI.isTargetWin32()) { TlsReg = X86::FS; TlsOffset = 0x14; // pvArbitrary, reserved for application use - } else if (ST->isTargetFreeBSD()) { + } else if (STI.isTargetFreeBSD()) { report_fatal_error("Segmented stacks not supported on FreeBSD i386."); } else { report_fatal_error("Segmented stacks not supported on this platform."); @@ -1523,10 +1522,10 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) .addImm(1).addReg(0).addImm(-StackSize).addReg(0); - if (ST->isTargetLinux() || ST->isTargetWin32()) { + if (STI.isTargetLinux() || STI.isTargetWin32()) { BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { // TlsOffset doesn't fit into a mod r/m byte so we need an extra register unsigned ScratchReg2; @@ -1632,19 +1631,18 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { const X86InstrInfo &TII = *TM.getInstrInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const uint64_t SlotSize = TM.getRegisterInfo()->getSlotSize(); + const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize(); const bool Is64Bit = STI.is64Bit(); DebugLoc DL; // HiPE-specific values const unsigned HipeLeafWords = 24; const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; const unsigned Guaranteed = HipeLeafWords * SlotSize; - const unsigned CallerStkArity = - std::max(0, MF.getFunction()->arg_size() - CCRegisteredArgs); - unsigned MaxStack = - MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize; + unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? + MF.getFunction()->arg_size() - CCRegisteredArgs : 0; + unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; - assert(MF.getTarget().getSubtarget().isTargetLinux() && + assert(STI.isTargetLinux() && "HiPE prologue is only supported on Linux operating systems."); // Compute the largest caller's frame that is needed to fit the callees' @@ -1660,31 +1658,37 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); MBBI != MBBE; ++MBBI) for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); - MI != ME; ++MI) - if (MI->isCall()) { - // Get callee operand. - const MachineOperand &MO = MI->getOperand(0); - const Function *F; - - // Only take account of global function calls (no closures etc.). - if (!MO.isGlobal()) continue; - if (!(F = dyn_cast(MO.getGlobal()))) continue; - - // Do not update 'MaxStack' for primitive and built-in functions - // (encoded with names either starting with "erlang."/"bif_" or not - // having a ".", such as a simple .., or an - // "_", such as the BIF "suspend_0") as they are executed on another - // stack. - if ((F->getName().find("erlang.") != std::string::npos) || - (F->getName().find("bif_") != std::string::npos)) continue; - if (F->getName().find_first_of("._") == std::string::npos) - continue; - - const uint64_t CalleeStkArity = - std::max(0, F->arg_size() - CCRegisteredArgs); - MoreStackForCalls = std::max( - MoreStackForCalls, (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); - } + MI != ME; ++MI) { + if (!MI->isCall()) + continue; + + // Get callee operand. + const MachineOperand &MO = MI->getOperand(0); + + // Only take account of global function calls (no closures etc.). + if (!MO.isGlobal()) + continue; + + const Function *F = dyn_cast(MO.getGlobal()); + if (!F) + continue; + + // Do not update 'MaxStack' for primitive and built-in functions + // (encoded with names either starting with "erlang."/"bif_" or not + // having a ".", such as a simple .., or an + // "_", such as the BIF "suspend_0") as they are executed on another + // stack. + if (F->getName().find("erlang.") != StringRef::npos || + F->getName().find("bif_") != StringRef::npos || + F->getName().find_first_of("._") == StringRef::npos) + continue; + + unsigned CalleeStkArity = + F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; + if (HipeLeafWords - 1 > CalleeStkArity) + MoreStackForCalls = std::max(MoreStackForCalls, + (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); + } MaxStack += MoreStackForCalls; } -- cgit v1.2.3-18-g5258 From 608e3554847ef35995b7310ba8acd43ab861e3f0 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 19 Feb 2013 21:54:59 +0000 Subject: Add obvious constantness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175560 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6f13186dfb..00fbe6924c 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -280,13 +280,13 @@ namespace { /// getTargetMachine - Return a reference to the TargetMachine, casted /// to the target-specific type. - const X86TargetMachine &getTargetMachine() { + const X86TargetMachine &getTargetMachine() const { return static_cast(TM); } /// getInstrInfo - Return a reference to the TargetInstrInfo, casted /// to the target-specific type. - const X86InstrInfo *getInstrInfo() { + const X86InstrInfo *getInstrInfo() const { return getTargetMachine().getInstrInfo(); } }; -- cgit v1.2.3-18-g5258 From b86f1e5e557f8a00209eef1c6ecb4532b33d7738 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 19 Feb 2013 23:50:45 +0000 Subject: [ms-inline asm] Force the use of a base pointer if the MachineFunction includes MS-style inline assembly. This is a follow-on to r175334. Forcing a FP to be emitted doesn't ensure it will be used. Therefore, force the base pointer as well. We now treat MS inline assembly in the same way we treat functions with dynamic stack realignment and VLAs. This guarantees the BP will be used to reference parameters and locals. rdar://13218191 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 401eefb16a..fbe6331e84 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -389,8 +389,10 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; // When we need stack realignment and there are dynamic allocas, we can't - // reference off of the stack pointer, so we reserve a base pointer. - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + // reference off of the stack pointer, so we reserve a base pointer. This + // is also true if the function contain MS-style inline assembly. + if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) || + MF.hasMSInlineAsm()) return true; return false; -- cgit v1.2.3-18-g5258 From 52981c4b6016d9f0e295e0771ec0a50dd073b4b3 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 20 Feb 2013 12:42:54 +0000 Subject: I optimized the following patterns: sext <4 x i1> to <4 x i64> sext <4 x i8> to <4 x i64> sext <4 x i16> to <4 x i64> I'm running Combine on SIGN_EXTEND_IN_REG and revert SEXT patterns: (sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) The sext_in_reg (v4i32 x) may be lowered to shl+sar operations. The "sar" does not exist on 64-bit operation, so lowering sext_in_reg (v4i64 x) has no vector solution. I also added a cost of this operations to the AVX costs table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175619 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 37 +++++++++++++++++++++++++++++++ lib/Target/X86/X86TargetTransformInfo.cpp | 3 +++ 2 files changed, 40 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9ed03cd1ee..a2271663ee 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1323,6 +1323,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SETCC); @@ -17076,6 +17077,41 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT ExtraVT = cast(N1)->getVT(); + DebugLoc dl = N->getDebugLoc(); + + // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the + // both SSE and AVX2 since there is no sign-extended shift right + // operation on a vector with 64-bit elements. + //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> + // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) + if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND)) { + SDValue N00 = N0.getOperand(0); + + // EXTLOAD has a better solution on AVX2, + // it may be replaced with X86ISD::VSEXT node. + if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256()) + if (!ISD::isNormalLoad(N00.getNode())) + return SDValue(); + + if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { + SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, + N00, N1); + return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); + } + } + return SDValue(); +} + static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -17468,6 +17504,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); + case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index f3dfa0e413..fefb479da9 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -232,6 +232,9 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; -- cgit v1.2.3-18-g5258 From 9b5b8b0b9439da2dd4167ece15db7e13d37f461c Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 20 Feb 2013 18:03:44 +0000 Subject: [ms-inline asm] Make the comment a bit more verbose. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175641 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index fbe6331e84..3b9144dc0b 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -389,8 +389,12 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; // When we need stack realignment and there are dynamic allocas, we can't - // reference off of the stack pointer, so we reserve a base pointer. This - // is also true if the function contain MS-style inline assembly. + // reference off of the stack pointer, so we reserve a base pointer. + // + // This is also true if the function contain MS-style inline assembly. We + // do this because if any stack changes occur in the inline assembly, e.g., + // "pusha", then any C local variable or C argument references in the + // inline assembly will be wrong because the SP is not properly tracked. if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) || MF.hasMSInlineAsm()) return true; -- cgit v1.2.3-18-g5258 From 3450f800aa65c91f0496816ba6061a422a74c1fe Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 20 Feb 2013 21:13:59 +0000 Subject: Update TargetLowering ivars for name policy. http://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly ivars should be camel-case and start with an upper-case letter. A few in TargetLowering were starting with a lower-case letter. No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175667 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a2271663ee..2315ac7f23 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1335,17 +1335,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // On Darwin, -Os means optimize for size without hurting performance, // do not reduce the limit. - maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores - maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; - maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; - maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores - maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores + MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; + MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores + MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores + MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; setPrefLoopAlignment(4); // 2^4 bytes. - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; // Predictable cmov don't hurt on atom because it's in-order. - predictableSelectIsExpensive = !Subtarget->isAtom(); + PredictableSelectIsExpensive = !Subtarget->isAtom(); setPrefFunctionAlignment(4); // 2^4 bytes. } -- cgit v1.2.3-18-g5258 From cb2ae3d98e3bb36e5813f8f69b00d39efd026dcd Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 20 Feb 2013 22:21:35 +0000 Subject: MCParser: Update method names per coding guidelines. s/AddDirectiveHandler/addDirectiveHandler/ s/ParseMSInlineAsm/parseMSInlineAsm/ s/ParseIdentifier/parseIdentifier/ s/ParseStringToEndOfStatement/parseStringToEndOfStatement/ s/ParseEscapedString/parseEscapedString/ s/EatToEndOfStatement/eatToEndOfStatement/ s/ParseExpression/parseExpression/ s/ParseParenExpression/parseParenExpression/ s/ParseAbsoluteExpression/parseAbsoluteExpression/ s/CheckForValidSection/checkForValidSection/ http://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175675 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8c4c447df9..b2c6d55026 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -906,7 +906,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().is(AsmToken::Identifier)) { if (ParseRegister(TmpReg, Start, End)) { const MCExpr *Disp; - if (getParser().ParseExpression(Disp, End)) + if (getParser().parseExpression(Disp, End)) return 0; if (getLexer().isNot(AsmToken::RBrac)) @@ -951,7 +951,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SM.onRegister(TmpReg); UpdateLocLex = false; break; - } else if (!getParser().ParseExpression(Disp, End)) { + } else if (!getParser().parseExpression(Disp, End)) { SM.onDispExpr(); UpdateLocLex = false; break; @@ -1033,7 +1033,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { } const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); - if (getParser().ParseExpression(Disp, End)) + if (getParser().parseExpression(Disp, End)) return 0; bool NeedSizeDir = false; @@ -1135,7 +1135,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { SMLoc End; const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return ErrorOperand(Start, "Unable to parse expression!"); // Don't emit the offset operator. @@ -1169,7 +1169,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { SMLoc End; const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return 0; unsigned Length = 0, Size = 0, Type = 0; @@ -1220,7 +1220,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { const MCExpr *Val; - if (!getParser().ParseExpression(Val, End)) { + if (!getParser().parseExpression(Val, End)) { return X86Operand::CreateImm(Val, Start, End); } } @@ -1271,7 +1271,7 @@ X86Operand *X86AsmParser::ParseATTOperand() { SMLoc Start = Parser.getTok().getLoc(), End; Parser.Lex(); const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return 0; return X86Operand::CreateImm(Val, Start, End); } @@ -1289,7 +1289,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; - if (getParser().ParseExpression(Disp, ExprEnd)) return 0; + if (getParser().parseExpression(Disp, ExprEnd)) return 0; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. @@ -1315,7 +1315,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc ExprEnd; // It must be an parenthesized expression, parse it now. - if (getParser().ParseParenExpression(Disp, ExprEnd)) + if (getParser().parseParenExpression(Disp, ExprEnd)) return 0; // After parsing the base expression we could either have a parenthesized @@ -1375,7 +1375,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc Loc = Parser.getTok().getLoc(); int64_t ScaleVal; - if (getParser().ParseAbsoluteExpression(ScaleVal)){ + if (getParser().parseAbsoluteExpression(ScaleVal)){ Error(Loc, "expected scale expression"); return 0; } @@ -1394,7 +1394,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc Loc = Parser.getTok().getLoc(); int64_t Value; - if (getParser().ParseAbsoluteExpression(Value)) + if (getParser().parseAbsoluteExpression(Value)) return 0; if (Value != 1) @@ -1535,7 +1535,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); else { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -1546,14 +1546,14 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); else { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } } if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } } @@ -2243,7 +2243,7 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; getParser().getStreamer().EmitValue(Value, Size); -- cgit v1.2.3-18-g5258 From 8389f24a66674e5776399f744fa8def3b217bc9e Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 21 Feb 2013 16:40:18 +0000 Subject: getX86SubSuperRegister has a special mode with High=true for i64 which exists solely to enable it to call itself for i8 with some registers. The proposed patch simplifies the function somewhat to make the High bit only meaningful for the i8 mode, which makes sense. No functional difference (getX86SubSuperRegister is not getting called from anywhere outside with i64 and High=true). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175762 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 3b9144dc0b..6b0a9154b1 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -620,7 +620,15 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, case MVT::i8: if (High) { switch (Reg) { - default: return getX86SubSuperRegister(Reg, MVT::i64, High); + default: return getX86SubSuperRegister(Reg, MVT::i64); + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AH; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -740,22 +748,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, return X86::R15D; } case MVT::i64: - // For 64-bit mode if we've requested a "high" register and the - // Q or r constraints we want one of these high registers or - // just the register name otherwise. - if (High) { - switch (Reg) { - case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: - return X86::SI; - case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: - return X86::DI; - case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: - return X86::BP; - case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: - return X86::SP; - // Fallthrough. - } - } switch (Reg) { default: llvm_unreachable("Unexpected register"); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: -- cgit v1.2.3-18-g5258 From 700ed80d3da5e98e05ceb90e9bfb66058581a6db Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 21 Feb 2013 20:05:00 +0000 Subject: Move the eliminateCallFramePseudoInstr method from TargetRegisterInfo to TargetFrameLowering, where it belongs. Incidentally, this allows us to delete some duplicated (and slightly different!) code in TRI. There are potentially other layering problems that can be cleaned up as a result, or in a similar manner. The refactoring was OK'd by Anton Korobeynikov on llvmdev. Note: this touches the target interfaces, so out-of-tree targets may be affected. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175788 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 85 +++++++++++++++++++++++++++++- lib/Target/X86/X86FrameLowering.h | 4 ++ lib/Target/X86/X86RegisterInfo.cpp | 101 ------------------------------------ lib/Target/X86/X86RegisterInfo.h | 4 -- 4 files changed, 87 insertions(+), 107 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 2b3d8538af..a05cf5c346 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -55,8 +55,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MMI.callsUnwindInit() || MMI.callsEHReturn()); } -static unsigned getSUBriOpcode(unsigned isLP64, int64_t Imm) { - if (isLP64) { +static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { + if (IsLP64) { if (isInt<8>(Imm)) return X86::SUB64ri8; return X86::SUB64ri32; @@ -1756,3 +1756,84 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { MF.verify(); #endif } + +void X86FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86RegisterInfo &RegInfo = *TM.getRegisterInfo(); + unsigned StackPtr = RegInfo.getStackRegister(); + bool reseveCallFrame = hasReservedCallFrame(MF); + int Opcode = I->getOpcode(); + bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + bool IsLP64 = STI.isTarget64BitLP64(); + DebugLoc DL = I->getDebugLoc(); + uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; + uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; + I = MBB.erase(I); + + if (!reseveCallFrame) { + // If the stack pointer can be changed after prologue, turn the + // adjcallstackup instruction into a 'sub ESP, ' and the + // adjcallstackdown instruction into 'add ESP, ' + // TODO: consider using push / pop instead of sub + store / add + if (Amount == 0) + return; + + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; + + MachineInstr *New = 0; + if (Opcode == TII.getCallFrameSetupOpcode()) { + New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), + StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } else { + assert(Opcode == TII.getCallFrameDestroyOpcode()); + + // Factor out the amount the callee already popped. + Amount -= CalleeAmt; + + if (Amount) { + unsigned Opc = getADDriOpcode(IsLP64, Amount); + New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); + } + } + + if (New) { + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction. + MBB.insert(I, New); + } + + return; + } + + if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. We do this until we have + // more advanced stack pointer tracking ability. + unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); + MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(CalleeAmt); + + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // We are not tracking the stack pointer adjustment by the callee, so make + // sure we restore the stack pointer immediately after the call, there may + // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. + MachineBasicBlock::iterator B = MBB.begin(); + while (I != B && !llvm::prior(I)->isCall()) + --I; + MBB.insert(I, New); + } +} + diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index c35d9528b7..3f08b9a2e8 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -65,6 +65,10 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; uint32_t getCompactUnwindEncoding(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 6b0a9154b1..03f412fd91 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -447,107 +447,6 @@ bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return false; } -static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { - if (isInt<8>(Imm)) - return X86::SUB64ri8; - return X86::SUB64ri32; - } else { - if (isInt<8>(Imm)) - return X86::SUB32ri8; - return X86::SUB32ri; - } -} - -static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { - if (isInt<8>(Imm)) - return X86::ADD64ri8; - return X86::ADD64ri32; - } else { - if (isInt<8>(Imm)) - return X86::ADD32ri8; - return X86::ADD32ri; - } -} - -void X86RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - bool reseveCallFrame = TFI->hasReservedCallFrame(MF); - int Opcode = I->getOpcode(); - bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); - DebugLoc DL = I->getDebugLoc(); - uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; - uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; - I = MBB.erase(I); - - if (!reseveCallFrame) { - // If the stack pointer can be changed after prologue, turn the - // adjcallstackup instruction into a 'sub ESP, ' and the - // adjcallstackdown instruction into 'add ESP, ' - // TODO: consider using push / pop instead of sub + store / add - if (Amount == 0) - return; - - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - - MachineInstr *New = 0; - if (Opcode == TII.getCallFrameSetupOpcode()) { - New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), - StackPtr) - .addReg(StackPtr) - .addImm(Amount); - } else { - assert(Opcode == TII.getCallFrameDestroyOpcode()); - - // Factor out the amount the callee already popped. - Amount -= CalleeAmt; - - if (Amount) { - unsigned Opc = getADDriOpcode(Is64Bit, Amount); - New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(Amount); - } - } - - if (New) { - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); - - // Replace the pseudo instruction with a new instruction. - MBB.insert(I, New); - } - - return; - } - - if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. We do this until we have - // more advanced stack pointer tracking ability. - unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); - MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(CalleeAmt); - - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); - - // We are not tracking the stack pointer adjustment by the callee, so make - // sure we restore the stack pointer immediately after the call, there may - // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. - MachineBasicBlock::iterator B = MBB.begin(); - while (I != B && !llvm::prior(I)->isCall()) - --I; - MBB.insert(I, New); - } -} - void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 5b45e9e204..b9d7b8cf8b 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -117,10 +117,6 @@ public: bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; - void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; -- cgit v1.2.3-18-g5258 From c4952bfc31ee437590eeba8f16800fda5e4d607e Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 22 Feb 2013 19:19:44 +0000 Subject: x86_64: designate most general purpose and SSE registers as callee save under coldcc git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175911 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CallingConv.td | 3 +++ lib/Target/X86/X86RegisterInfo.cpp | 46 +++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 21 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7ad2fdd259..b516be0696 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -519,6 +519,9 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, (sequence "XMM%u", 6, 15))>; +def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10, + R11, R12, R13, R14, R15, RBP, + (sequence "XMM%u", 0, 15))>; // Standard C + YMM6-15 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 03f412fd91..16886e432d 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -235,38 +235,40 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const uint16_t * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool callsEHReturn = false; - bool ghcCall = false; - bool oclBiCall = false; - bool hipeCall = false; - bool HasAVX = TM.getSubtarget().hasAVX(); - - if (MF) { - callsEHReturn = MF->getMMI().callsEHReturn(); - const Function *F = MF->getFunction(); - ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); - oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false); - hipeCall = (F ? F->getCallingConv() == CallingConv::HiPE : false); - } - - if (ghcCall || hipeCall) + switch (MF->getFunction()->getCallingConv()) { + case CallingConv::GHC: + case CallingConv::HiPE: return CSR_NoRegs_SaveList; - if (oclBiCall) { + + case CallingConv::Intel_OCL_BI: { + bool HasAVX = TM.getSubtarget().hasAVX(); if (HasAVX && IsWin64) - return CSR_Win64_Intel_OCL_BI_AVX_SaveList; + return CSR_Win64_Intel_OCL_BI_AVX_SaveList; if (HasAVX && Is64Bit) - return CSR_64_Intel_OCL_BI_AVX_SaveList; + return CSR_64_Intel_OCL_BI_AVX_SaveList; if (!HasAVX && !IsWin64 && Is64Bit) - return CSR_64_Intel_OCL_BI_SaveList; + return CSR_64_Intel_OCL_BI_SaveList; + break; } + + case CallingConv::Cold: + if (Is64Bit) + return CSR_MostRegs_64_SaveList; + break; + + default: + break; + } + + bool CallsEHReturn = MF->getMMI().callsEHReturn(); if (Is64Bit) { if (IsWin64) return CSR_Win64_SaveList; - if (callsEHReturn) + if (CallsEHReturn) return CSR_64EHRet_SaveList; return CSR_64_SaveList; } - if (callsEHReturn) + if (CallsEHReturn) return CSR_32EHRet_SaveList; return CSR_32_SaveList; } @@ -287,6 +289,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return CSR_NoRegs_RegMask; if (!Is64Bit) return CSR_32_RegMask; + if (CC == CallingConv::Cold) + return CSR_MostRegs_64_RegMask; if (IsWin64) return CSR_Win64_RegMask; return CSR_64_RegMask; -- cgit v1.2.3-18-g5258 From 36bd16ef0bd808fb4191bfb6e1b4609aef0dc6bd Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 23 Feb 2013 10:40:58 +0000 Subject: X86: Disable cmov-memory patterns on subtargets without cmov. Fixes PR15115. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175962 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrCompiler.td | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index f3879628c5..734e5982b2 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1081,12 +1081,14 @@ def : Pat<(X86cmp GR64:$src1, 0), // inverted. multiclass CMOVmr { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), - (Inst16 GR16:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), - (Inst32 GR32:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), - (Inst64 GR64:$src2, addr:$src1)>; + let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), + (Inst16 GR16:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), + (Inst32 GR32:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), + (Inst64 GR64:$src2, addr:$src1)>; + } } defm : CMOVmr; -- cgit v1.2.3-18-g5258 From f8db4478f80ef63cf51518affc7b97c9d310bc23 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 24 Feb 2013 07:09:35 +0000 Subject: Revert r169638 because it broke Mesa llvmpipe tests. Fix PR15239. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175985 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2315ac7f23..1543e23a46 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16248,11 +16248,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, DebugLoc DL = N->getDebugLoc(); - // We are going to replace the AND, OR, NAND with either BLEND - // or PSIGN, which only look at the MSB. The VSRAI instruction - // does not affect the highest bit, so we can get rid of it. - Mask = Mask.getOperand(0); - // Now we know we at least have a plendvb with the mask val. See if // we can form a psignb/w/d. // psign = x.type == y.type == mask.type && y = sub(0, x); @@ -16261,7 +16256,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Unsupported VT for PSIGN"); - Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask); + Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } // PBLENDVB only available on SSE 4.1 -- cgit v1.2.3-18-g5258 From dca83187b7c4465ad6ff8507052223d31c0ea66a Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 25 Feb 2013 19:06:27 +0000 Subject: [ms-inline asm] Add support for the pushad/popad mnemonics. rdar://13254235 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176036 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 84c278c7de..d989ec7bb0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -870,16 +870,14 @@ let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, Requires<[In64BitMode]>; - - let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], mayLoad=1, neverHasSideEffects=1 in { -def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>, +def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], mayStore=1, neverHasSideEffects=1 in { -def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>, +def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } -- cgit v1.2.3-18-g5258 From fd3417d288c22673ec6d76dc4695989bb544373f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 25 Feb 2013 21:59:35 +0000 Subject: [fast-isel] Add X86FastIsel::FastLowerArguments to handle functions with 6 or fewer scalar integer (i32 or i64) arguments. It completely eliminates the need for SDISel for trivial functions. Also, add the new llc -fast-isel-abort-args option, which is similar to -fast-isel-abort option, but for formal argument lowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176052 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 4e5430d34a..ed17b113a6 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -75,6 +75,8 @@ public: virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); + virtual bool FastLowerArguments(); + #include "X86GenFastISel.inc" private: @@ -1520,6 +1522,77 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { } } +bool X86FastISel::FastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + CallingConv::ID CC = F->getCallingConv(); + if (CC != CallingConv::C) + return false; + + if (!Subtarget->is64Bit()) + return false; + + // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (Idx > 6) + return false; + + if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || + F->getAttributes().hasAttribute(Idx, Attribute::InReg) || + F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + return false; + + Type *ArgTy = I->getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + return false; + + EVT ArgVT = TLI.getValueType(ArgTy); + switch (ArgVT.getSimpleVT().SimpleTy) { + case MVT::i32: + case MVT::i64: + break; + default: + return false; + } + } + + static const uint16_t GPR32ArgRegs[] = { + X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D + }; + static const uint16_t GPR64ArgRegs[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 + }; + + Idx = 0; + const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (I->use_empty()) + continue; + bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32; + const TargetRegisterClass *RC = is32Bit ? RC32 : RC64; + unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx]; + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); + // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. + // Without this, EmitLiveInCopies may eliminate the livein if its only + // use is a bitcast (which isn't turned into an instruction). + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(DstReg, getKillRegState(true)); + UpdateValueMap(I, ResultReg); + } + return true; +} + bool X86FastISel::X86SelectCall(const Instruction *I) { const CallInst *CI = cast(I); const Value *Callee = CI->getCalledValue(); -- cgit v1.2.3-18-g5258 From 5e6e15caa79c30e155665e58b81540ec15e52051 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 25 Feb 2013 23:01:03 +0000 Subject: Fix PR10499 - Check whether SSE is available before lowering all 1s vector building with PCMPEQD, which is only available from SSE2 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176058 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1543e23a46..57014dd488 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5344,7 +5344,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. - if (ISD::isBuildVectorAllOnes(Op.getNode())) { + if (ISD::isBuildVectorAllOnes(Op.getNode()) && Subtarget->hasSSE2()) { if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256())) return Op; -- cgit v1.2.3-18-g5258 From d09318f9013aad79d7872de2ba490734b597f4d4 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 25 Feb 2013 23:16:36 +0000 Subject: Refine fix to PR10499, no functionality change - Put expensive checking after simple one git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176060 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 57014dd488..fb3352059a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5344,7 +5344,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. - if (ISD::isBuildVectorAllOnes(Op.getNode()) && Subtarget->hasSSE2()) { + if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256())) return Op; -- cgit v1.2.3-18-g5258 From fe88aa0d148510e41bc3080dea4febcb1445855c Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 26 Feb 2013 01:05:31 +0000 Subject: [fast-isel] Make sure the FastLowerArguments function checks to make sure the arguments type is a simple type. rdar://13290455 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176066 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ed17b113a6..b5c3270065 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1555,6 +1555,7 @@ bool X86FastISel::FastLowerArguments() { return false; EVT ArgVT = TLI.getValueType(ArgTy); + if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { case MVT::i32: case MVT::i64: -- cgit v1.2.3-18-g5258 From e7c52282dd1b0f2d53c72c0036e28badb43221b6 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 27 Feb 2013 05:23:56 +0000 Subject: std::string to StringRef. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176166 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 0f2c008ab9..ec8733fba6 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -332,9 +332,9 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { "target-cpu"); Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); - std::string CPU = - !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; - std::string FS = + StringRef CPU = + !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : ""; + StringRef FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; if (!FS.empty()) { initializeEnvironment(); @@ -343,7 +343,7 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { - std::string CPUName = CPU; + StringRef CPUName = CPU; if (!FS.empty() || !CPU.empty()) { if (CPUName.empty()) { #if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ -- cgit v1.2.3-18-g5258 From f8b80de1a952f14921c3137545e05575ea88e4ab Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 27 Feb 2013 05:56:20 +0000 Subject: Revert r176166 because it broke one of the lit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176171 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index ec8733fba6..0f2c008ab9 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -332,9 +332,9 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { "target-cpu"); Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); - StringRef CPU = - !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : ""; - StringRef FS = + std::string CPU = + !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; + std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; if (!FS.empty()) { initializeEnvironment(); @@ -343,7 +343,7 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { - StringRef CPUName = CPU; + std::string CPUName = CPU; if (!FS.empty() || !CPU.empty()) { if (CPUName.empty()) { #if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ -- cgit v1.2.3-18-g5258 From 2d1035def15030ae404b227747d0a28454ea32a9 Mon Sep 17 00:00:00 2001 From: Yiannis Tsiouris Date: Thu, 28 Feb 2013 16:59:10 +0000 Subject: Re-format comments (and check commit access) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176270 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index a05cf5c346..54cbd40274 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1386,7 +1386,6 @@ HasNestArgument(const MachineFunction *MF) { return false; } - /// GetScratchRegister - Get a temp register for performing work in the /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform /// and the properties of the function either one or two registers will be @@ -1612,22 +1611,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { #endif } -// Erlang programs may need a special prologue to handle the stack size they -// might need at runtime. That is because Erlang/OTP does not implement a C -// stack but uses a custom implementation of hybrid stack/heap -// architecture. (for more information see Eric Stenman's Ph.D. thesis: -// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) -// -// -// CheckStack: -// temp0 = sp - MaxStack -// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart -// OldStart: -// ... -// IncStack: -// call inc_stack # doubles the stack space -// temp0 = sp - MaxStack -// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// Erlang programs may need a special prologue to handle the stack size they +/// might need at runtime. That is because Erlang/OTP does not implement a C +/// stack but uses a custom implementation of hybrid stack/heap architecture. +/// (for more information see Eric Stenman's Ph.D. thesis: +/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) +/// +/// CheckStack: +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// OldStart: +/// ... +/// IncStack: +/// call inc_stack # doubles the stack space +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { const X86InstrInfo &TII = *TM.getInstrInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); -- cgit v1.2.3-18-g5258 From 4d9b7c234fd2510c27e6d74a3f0653efc0141580 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Fri, 1 Mar 2013 09:46:03 +0000 Subject: GCC thinks that this variable might be used uninitialized (it isn't). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176341 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index b2c6d55026..ee5c2b2bfd 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2080,7 +2080,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; - unsigned ErrorInfoMissingFeature; + unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. unsigned Match1, Match2, Match3, Match4; Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, -- cgit v1.2.3-18-g5258 From a6b20ced765b67a85d9219d0c8547fc9c133e14f Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Fri, 1 Mar 2013 18:40:30 +0000 Subject: Fix PR10475 - ISD::SHL/SRL/SRA must have either both scalar or both vector operands but TLI.getShiftAmountTy() so far only return scalar type. As a result, backend logic assuming that breaks. - Rename the original TLI.getShiftAmountTy() to TLI.getScalarShiftAmountTy() and re-define TLI.getShiftAmountTy() to return target-specificed scalar type or the same vector type as the 1st operand. - Fix most TICG logic assuming TLI.getShiftAmountTy() a simple scalar type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176364 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- lib/Target/X86/X86ISelLowering.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fb3352059a..451acca5f3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4956,7 +4956,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, DAG.getConstant(NumBits, - TLI.getShiftAmountTy(SrcOp.getValueType())))); + TLI.getScalarShiftAmountTy(SrcOp.getValueType())))); } SDValue diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 958ceb0f89..da1dad0f40 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -471,7 +471,7 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; } virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, -- cgit v1.2.3-18-g5258 From 5f0d9dbdf48a9efe16bfadf88e5335f7b9a8ec3f Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Sat, 2 Mar 2013 04:02:52 +0000 Subject: X86 cost model: Adjust cost for custom lowered vector multiplies This matters for example in following matrix multiply: int **mmult(int rows, int cols, int **m1, int **m2, int **m3) { int i, j, k, val; for (i=0; ihasAVX()) { - int Idx = CostTableLookup(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, - LT.second); + if (ST->hasAVX() && !ST->hasAVX2()) { + int Idx = CostTableLookup(AVX1CostTable, array_lengthof(AVX1CostTable), + ISD, LT.second); if (Idx != -1) return LT.first * AVX1CostTable[Idx].Cost; } + + // Custom lowering of vectors. + static const CostTblEntry CustomLowered[] = { + // A v2i64/v4i64 and multiply is custom lowered as a series of long + // multiplies(3), shifts(4) and adds(2). + { ISD::MUL, MVT::v2i64, 9 }, + { ISD::MUL, MVT::v4i64, 9 }, + }; + int Idx = CostTableLookup(CustomLowered, array_lengthof(CustomLowered), + ISD, LT.second); + if (Idx != -1) + return LT.first * CustomLowered[Idx].Cost; + + // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle, + // 2x pmuludq, 2x shuffle. + if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() && + !ST->hasSSE41()) + return 6; + // Fallback to the default implementation. return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); } -- cgit v1.2.3-18-g5258 From 9a2cfffdb6340c54ff553c1b81364d0f17fa8f45 Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Mon, 4 Mar 2013 18:13:57 +0000 Subject: Bypass Slow Divides * Only apply divide bypass optimization when not optimizing for size. * Fixed bug caused by constant for 0 value of type Int32, used dividend type to generate the constant instead. * For atom x86-64 apply the divide bypass to use 16-bit divides instead of 64-bit divides when operand values are small enough. * Added lit tests for 64-bit divide bypass. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 451acca5f3..5d12b0a392 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -181,9 +181,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); - // Bypass i32 with i8 on Atom when compiling with O2 - if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) + // Bypass expensive divides on Atom when compiling with O2 + if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) { addBypassSlowDiv(32, 8); + if (Subtarget->is64Bit()) + addBypassSlowDiv(64, 16); + } if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { // Setup Windows compiler runtime calls. -- cgit v1.2.3-18-g5258 From 6c4265a541c9e431961113c1a5d92fb4628bfe13 Mon Sep 17 00:00:00 2001 From: David Sehr Date: Tue, 5 Mar 2013 00:02:23 +0000 Subject: The current X86 NOP padding uses one long NOP followed by the remainder in one-byte NOPs. If the processor actually executes those NOPs, as it sometimes does with aligned bundling, this can have a performance impact. From my micro-benchmarks run on my one machine, a 15-byte NOP followed by twelve one-byte NOPs is about 20% worse than a 15 followed by a 12. This patch changes NOP emission to emit as many 15-byte (the maximum) as possible followed by at most one shorter NOP. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176464 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index acc90eceba..598ddee56d 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -315,18 +315,18 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } - // Write an optimal sequence for the first 15 bytes. - const uint64_t OptimalCount = (Count < 16) ? Count : 15; - const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10; - for (uint64_t i = 0, e = Prefixes; i != e; i++) - OW->Write8(0x66); - const uint64_t Rest = OptimalCount - Prefixes; - for (uint64_t i = 0, e = Rest; i != e; i++) - OW->Write8(Nops[Rest - 1][i]); - - // Finish with single byte nops. - for (uint64_t i = OptimalCount, e = Count; i != e; ++i) - OW->Write8(0x90); + // 15 is the longest single nop instruction. Emit as many 15-byte nops as + // needed, then emit a nop of the remaining length. + do { + const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15); + const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; + for (uint8_t i = 0; i < Prefixes; i++) + OW->Write8(0x66); + const uint8_t Rest = ThisNopLength - Prefixes; + for (uint8_t i = 0; i < Rest; i++) + OW->Write8(Nops[Rest - 1][i]); + Count -= ThisNopLength; + } while (Count != 0); return true; } -- cgit v1.2.3-18-g5258 From c537f79dcd9d91577b8e0a41c11f68b628d73af4 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 6 Mar 2013 00:17:04 +0000 Subject: Fix PR15355 - Clear 'mayStore' flag when loading from the atomic variable before the spin loop - Clear kill flag from one use to multiple use in registers forming the address to that atomic variable - don't use a physical register as live-in register in BB (neither entry nor landing pad.) by copying it into virtual register (patch by Cameron Zwarich) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176538 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 284 ++++++++++++++++++++++++------------- 1 file changed, 182 insertions(+), 102 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5d12b0a392..0952350d07 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12893,13 +12893,16 @@ static unsigned getPseudoCMOVOpc(EVT VT) { // to // // ... -// EAX = LOAD MI.addr +// t1 = LOAD MI.addr // loop: -// t1 = OP MI.val, EAX -// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] +// t4 = phi(t1, t3 / loop) +// t2 = OP MI.val, t4 +// EAX = t4 +// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined] +// t3 = EAX // JNE loop // sink: -// dst = EAX +// dst = t3 // ... MachineBasicBlock * X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, @@ -12936,7 +12939,11 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, const TargetRegisterClass *RC = MRI.getRegClass(DstReg); MVT::SimpleValueType VT = *RC->vt_begin(); - unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT); + unsigned t1 = MRI.createVirtualRegister(RC); + unsigned t2 = MRI.createVirtualRegister(RC); + unsigned t3 = MRI.createVirtualRegister(RC); + unsigned t4 = MRI.createVirtualRegister(RC); + unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT); unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT); unsigned LOADOpc = getLoadOpcode(VT); @@ -12944,12 +12951,16 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, // For the atomic load-arith operator, we generate // // thisMBB: - // EAX = LOAD [MI.addr] + // t1 = LOAD [MI.addr] // mainMBB: + // t4 = phi(t1 / thisMBB, t3 / mainMBB) // t1 = OP MI.val, EAX + // EAX = t4 // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] + // t3 = EAX // JNE mainMBB // sinkMBB: + // dst = t3 MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); @@ -12965,23 +12976,34 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // thisMBB: - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } + for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) { + unsigned flags = (*MMOI)->getFlags(); + flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad; + MachineMemOperand *MMO = + MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags, + (*MMOI)->getSize(), + (*MMOI)->getBaseAlignment(), + (*MMOI)->getTBAAInfo(), + (*MMOI)->getRanges()); + MIB.addMemOperand(MMO); + } thisMBB->addSuccessor(mainMBB); // mainMBB: MachineBasicBlock *origMainMBB = mainMBB; - mainMBB->addLiveIn(AccPhyReg); - // Copy AccPhyReg as it is used more than once. - unsigned AccReg = MRI.createVirtualRegister(RC); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg) - .addReg(AccPhyReg); + // Add a PHI. + BuildMI(mainMBB, DL, TII->get(X86::PHI), t4) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); - unsigned t1 = MRI.createVirtualRegister(RC); unsigned Opc = MI->getOpcode(); switch (Opc) { default: @@ -12999,20 +13021,20 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, case X86::ATOMXOR32: case X86::ATOMXOR64: { unsigned ARITHOpc = getNonAtomicOpcode(Opc); - BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg) - .addReg(AccReg); + BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg) + .addReg(t4); break; } case X86::ATOMNAND8: case X86::ATOMNAND16: case X86::ATOMNAND32: case X86::ATOMNAND64: { - unsigned t2 = MRI.createVirtualRegister(RC); + unsigned Tmp = MRI.createVirtualRegister(RC); unsigned NOTOpc; unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc); - BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg) - .addReg(AccReg); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2); + BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg) + .addReg(t4); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp); break; } case X86::ATOMMAX8: @@ -13036,20 +13058,22 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, BuildMI(mainMBB, DL, TII->get(CMPOpc)) .addReg(SrcReg) - .addReg(AccReg); + .addReg(t4); if (Subtarget->hasCMov()) { if (VT != MVT::i8) { // Native support - BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1) + BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2) .addReg(SrcReg) - .addReg(AccReg); + .addReg(t4); } else { // Promote i8 to i32 to use CMOV32 - const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32); + const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterClass *RC32 = + TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit); unsigned SrcReg32 = MRI.createVirtualRegister(RC32); unsigned AccReg32 = MRI.createVirtualRegister(RC32); - unsigned t2 = MRI.createVirtualRegister(RC32); + unsigned Tmp = MRI.createVirtualRegister(RC32); unsigned Undef = MRI.createVirtualRegister(RC32); BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef); @@ -13060,15 +13084,15 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, .addImm(X86::sub_8bit); BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32) .addReg(Undef) - .addReg(AccReg) + .addReg(t4) .addImm(X86::sub_8bit); - BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2) + BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp) .addReg(SrcReg32) .addReg(AccReg32); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1) - .addReg(t2, 0, X86::sub_8bit); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2) + .addReg(Tmp, 0, X86::sub_8bit); } } else { // Use pseudo select and lower them. @@ -13077,8 +13101,8 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, unsigned SelOpc = getPseudoCMOVOpc(VT); X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc); assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!"); - MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1) - .addReg(SrcReg).addReg(AccReg) + MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2) + .addReg(SrcReg).addReg(t4) .addImm(CC); mainMBB = EmitLoweredSelect(MIB, mainMBB); } @@ -13086,27 +13110,33 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, } } - // Copy AccPhyReg back from virtual register. - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg) - .addReg(AccReg); + // Copy PhyReg back from virtual register. + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg) + .addReg(t4); MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); - MIB.addReg(t1); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } + MIB.addReg(t2); MIB.setMemRefs(MMOBegin, MMOEnd); + // Copy PhyReg back to virtual register. + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3) + .addReg(PhyReg); + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); mainMBB->addSuccessor(origMainMBB); mainMBB->addSuccessor(sinkMBB); // sinkMBB: - sinkMBB->addLiveIn(AccPhyReg); - BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) - .addReg(AccPhyReg); + .addReg(t3); MI->eraseFromParent(); return sinkMBB; @@ -13123,15 +13153,24 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, // to // // ... -// EAX = LOAD [MI.addr + 0] -// EDX = LOAD [MI.addr + 4] +// t1L = LOAD [MI.addr + 0] +// t1H = LOAD [MI.addr + 4] // loop: -// EBX = OP MI.val.lo, EAX -// ECX = OP MI.val.hi, EDX +// t4L = phi(t1L, t3L / loop) +// t4H = phi(t1H, t3H / loop) +// t2L = OP MI.val.lo, t4L +// t2H = OP MI.val.hi, t4H +// EAX = t4L +// EDX = t4H +// EBX = t2L +// ECX = t2H // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] +// t3L = EAX +// t3H = EDX // JNE loop // sink: -// dst = EDX:EAX +// dstL = t3L +// dstH = t3H // ... MachineBasicBlock * X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, @@ -13172,20 +13211,37 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, const TargetRegisterClass *RC = &X86::GR32RegClass; const TargetRegisterClass *RC8 = &X86::GR8RegClass; + unsigned t1L = MRI.createVirtualRegister(RC); + unsigned t1H = MRI.createVirtualRegister(RC); + unsigned t2L = MRI.createVirtualRegister(RC); + unsigned t2H = MRI.createVirtualRegister(RC); + unsigned t3L = MRI.createVirtualRegister(RC); + unsigned t3H = MRI.createVirtualRegister(RC); + unsigned t4L = MRI.createVirtualRegister(RC); + unsigned t4H = MRI.createVirtualRegister(RC); + unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; unsigned LOADOpc = X86::MOV32rm; // For the atomic load-arith operator, we generate // // thisMBB: - // EAX = LOAD [MI.addr + 0] - // EDX = LOAD [MI.addr + 4] + // t1L = LOAD [MI.addr + 0] + // t1H = LOAD [MI.addr + 4] // mainMBB: - // EBX = OP MI.vallo, EAX - // ECX = OP MI.valhi, EDX + // t4L = phi(t1L / thisMBB, t3L / mainMBB) + // t4H = phi(t1H / thisMBB, t3H / mainMBB) + // t2L = OP MI.val.lo, t4L + // t2H = OP MI.val.hi, t4H + // EBX = t2L + // ECX = t2H // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] - // JNE mainMBB + // t3L = EAX + // t3H = EDX + // JNE loop // sinkMBB: + // dstL = t3L + // dstH = t3H MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); @@ -13202,35 +13258,50 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, // thisMBB: // Lo - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } + for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) { + unsigned flags = (*MMOI)->getFlags(); + flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad; + MachineMemOperand *MMO = + MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags, + (*MMOI)->getSize(), + (*MMOI)->getBaseAlignment(), + (*MMOI)->getTBAAInfo(), + (*MMOI)->getRanges()); + MIB.addMemOperand(MMO); + }; + MachineInstr *LowMI = MIB; + // Hi - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX); + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) + if (i == X86::AddrDisp) { MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32) - else - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + } else { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } } - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end()); thisMBB->addSuccessor(mainMBB); // mainMBB: MachineBasicBlock *origMainMBB = mainMBB; - mainMBB->addLiveIn(X86::EAX); - mainMBB->addLiveIn(X86::EDX); - // Copy EDX:EAX as they are used more than once. - unsigned LoReg = MRI.createVirtualRegister(RC); - unsigned HiReg = MRI.createVirtualRegister(RC); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX); - - unsigned t1L = MRI.createVirtualRegister(RC); - unsigned t1H = MRI.createVirtualRegister(RC); + // Add PHIs. + BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L) + .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); + BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H) + .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -13243,19 +13314,23 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, case X86::ATOMSUB6432: { unsigned HiOpc; unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L) + .addReg(SrcLoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H) + .addReg(SrcHiReg); break; } case X86::ATOMNAND6432: { unsigned HiOpc, NOTOpc; unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc); - unsigned t2L = MRI.createVirtualRegister(RC); - unsigned t2H = MRI.createVirtualRegister(RC); - BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H); + unsigned TmpL = MRI.createVirtualRegister(RC); + unsigned TmpH = MRI.createVirtualRegister(RC); + BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg) + .addReg(t4L); + BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg) + .addReg(t4H); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH); break; } case X86::ATOMMAX6432: @@ -13271,12 +13346,12 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, unsigned cc = MRI.createVirtualRegister(RC); // cl := cmp src_lo, lo BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) - .addReg(SrcLoReg).addReg(LoReg); + .addReg(SrcLoReg).addReg(t4L); BuildMI(mainMBB, DL, TII->get(LoOpc), cL); BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); // ch := cmp src_hi, hi BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) - .addReg(SrcHiReg).addReg(HiReg); + .addReg(SrcHiReg).addReg(t4H); BuildMI(mainMBB, DL, TII->get(HiOpc), cH); BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); // cc := if (src_hi == hi) ? cl : ch; @@ -13291,17 +13366,17 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, } BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); if (Subtarget->hasCMov()) { - BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L) - .addReg(SrcLoReg).addReg(LoReg); - BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H) - .addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L) + .addReg(SrcLoReg).addReg(t4L); + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H) + .addReg(SrcHiReg).addReg(t4H); } else { - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L) - .addReg(SrcLoReg).addReg(LoReg) + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L) + .addReg(SrcLoReg).addReg(t4L) .addImm(X86::COND_NE); mainMBB = EmitLoweredSelect(MIB, mainMBB); - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H) - .addReg(SrcHiReg).addReg(HiReg) + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H) + .addReg(SrcHiReg).addReg(t4H) .addImm(X86::COND_NE); mainMBB = EmitLoweredSelect(MIB, mainMBB); } @@ -13310,39 +13385,44 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, case X86::ATOMSWAP6432: { unsigned HiOpc; unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg); break; } } // Copy EDX:EAX back from HiReg:LoReg - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H); // Copy ECX:EBX from t1H:t1L - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H); MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } MIB.setMemRefs(MMOBegin, MMOEnd); + // Copy EDX:EAX back to t3H:t3L + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX); + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); mainMBB->addSuccessor(origMainMBB); mainMBB->addSuccessor(sinkMBB); // sinkMBB: - sinkMBB->addLiveIn(X86::EAX); - sinkMBB->addLiveIn(X86::EDX); - BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), DstLoReg) - .addReg(X86::EAX); + .addReg(t3L); BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), DstHiReg) - .addReg(X86::EDX); + .addReg(t3H); MI->eraseFromParent(); return sinkMBB; -- cgit v1.2.3-18-g5258 From fe9dbe0066ba4a35cb68fb281e6f6649ffa85ae9 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Thu, 7 Mar 2013 01:01:29 +0000 Subject: Fix two remaining issue after fixing PR15355 when CMOV is not available - Phi nodes should be replaced/updated after lowering CMOV into branch because 'mainMBB' updating operand in Phi node is changed. - Add EFLAGS in livein before lowering the 2nd CMOV. It's necessary as we will reuse the EFLAGS generated before the 1st lowered CMOV, which won't clobber EFLAGS. However, we need explicitly specify that. - '-attr=-cmov' test case are added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176598 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0952350d07..9727de8203 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13001,8 +13001,8 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, MachineBasicBlock *origMainMBB = mainMBB; // Add a PHI. - BuildMI(mainMBB, DL, TII->get(X86::PHI), t4) - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); + MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -13105,6 +13105,11 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, .addReg(SrcReg).addReg(t4) .addImm(CC); mainMBB = EmitLoweredSelect(MIB, mainMBB); + // Replace the original PHI node as mainMBB is changed after CMOV + // lowering. + BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); + Phi->eraseFromParent(); } break; } @@ -13298,10 +13303,10 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, MachineBasicBlock *origMainMBB = mainMBB; // Add PHIs. - BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L) - .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); - BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H) - .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); + MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L) + .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); + MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H) + .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -13375,10 +13380,21 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, .addReg(SrcLoReg).addReg(t4L) .addImm(X86::COND_NE); mainMBB = EmitLoweredSelect(MIB, mainMBB); + // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the + // 2nd CMOV lowering. + mainMBB->addLiveIn(X86::EFLAGS); MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H) .addReg(SrcHiReg).addReg(t4H) .addImm(X86::COND_NE); mainMBB = EmitLoweredSelect(MIB, mainMBB); + // Replace the original PHI node as mainMBB is changed after CMOV + // lowering. + BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L) + .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); + BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H) + .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); + PhiL->eraseFromParent(); + PhiH->eraseFromParent(); } break; } -- cgit v1.2.3-18-g5258 From 02c2ecf9f166522cc1c58dd484668c1cbacc0c6e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 7 Mar 2013 18:48:40 +0000 Subject: X86: Fold EXTRACT_SUBVECTORs of a BUILD_VECTOR into a smaller BUILD_VECTOR. That can usually be lowered efficiently and is common in sandybridge code. It would be nice to do this in DAGCombiner but we can't insert arbitrary BUILD_VECTORs this late. Fixes PR15462. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9727de8203..b19f2f66be 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -85,6 +85,11 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) * ElemsPerChunk); + // If the input is a buildvector just emit a smaller one. + if (Vec.getOpcode() == ISD::BUILD_VECTOR) + return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, + Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); -- cgit v1.2.3-18-g5258 From 3ef5383b3537a420c5e2ab3e657c378e5185549d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 8 Mar 2013 15:36:57 +0000 Subject: DAGCombiner: Use correct value type for checking legality of BR_CC v3 LegalizeDAG.cpp uses the value of the comparison operands when checking the legality of BR_CC, so DAGCombiner should do the same. v2: - Expand more BR_CC value types for NVPTX v3: - Expand correct BR_CC value types for Hexagon, Mips, and XCore. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176694 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b19f2f66be..960870dc60 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -376,7 +376,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); - setOperationAction(ISD::BR_CC , MVT::Other, Expand); + setOperationAction(ISD::BR_CC , MVT::f32, Expand); + setOperationAction(ISD::BR_CC , MVT::f64, Expand); + setOperationAction(ISD::BR_CC , MVT::f80, Expand); + setOperationAction(ISD::BR_CC , MVT::i8, Expand); + setOperationAction(ISD::BR_CC , MVT::i16, Expand); + setOperationAction(ISD::BR_CC , MVT::i32, Expand); + setOperationAction(ISD::BR_CC , MVT::i64, Expand); setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); -- cgit v1.2.3-18-g5258 From 12dccaed9c0368f4f5ef4312c32b375c725c9daf Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Mon, 11 Mar 2013 21:17:13 +0000 Subject: Fixes disassembler crashes on 2013 Haswell RTM instructions. rdar://13318048 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176828 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrTSX.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index ad55058ede..a37a8cc744 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -22,7 +22,7 @@ def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins), let isBranch = 1, isTerminator = 1, Defs = [EAX] in def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst), - "xbegin\t$dst", []>; + "xbegin\t$dst", []>, Requires<[HasRTM]>; def XEND : I<0x01, MRM_D5, (outs), (ins), "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>; -- cgit v1.2.3-18-g5258