aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86Subtarget.cpp
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2009-12-18 07:40:29 +0000
committerEvan Cheng <evan.cheng@apple.com>2009-12-18 07:40:29 +0000
commit400073d5467b79534d8c63b0d996a55e4252ff4b (patch)
treef7204e84da8877e7b062f05bcb1878a05108b44e /lib/Target/X86/X86Subtarget.cpp
parent3a5d409f3c2eccf1d1f0a4616023760829a4db67 (diff)
On recent Intel u-arch's, folding loads into some unary SSE instructions can
be non-optimal. To be precise, we should avoid folding loads if the instructions only update part of the destination register, and the non-updated part is not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these instructions breaks the partial register dependency and it can improve performance. e.g. movss (%rdi), %xmm0 cvtss2sd %xmm0, %xmm0 instead of cvtss2sd (%rdi), %xmm0 An alternative method to break dependency is to clear the register first. e.g. xorps %xmm0, %xmm0 cvtss2sd (%rdi), %xmm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91672 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86Subtarget.cpp')
-rw-r--r--lib/Target/X86/X86Subtarget.cpp2
1 files changed, 2 insertions, 0 deletions
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 75cdbada1b..4db3fdb82e 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -266,6 +266,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model);
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+ BreakSSEDep = IsIntel;
GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
@@ -286,6 +287,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
+ , BreakSSEDep(false)
, DarwinVers(0)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?