aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2012-08-31 23:10:34 +0000
committerCraig Topper <craig.topper@gmail.com>2012-08-31 23:10:34 +0000
commitdfb1e4babd2e825d951d42bcb45438b48c45b155 (patch)
tree15aa9e280d9f18f97e318c87a090ffa366f4b9b0 /lib/Target/X86/X86InstrInfo.cpp
parent42d619b8aed86b21514fccb63a01f91229b82462 (diff)
Mark FMA4 instructions as commutable and add them to the folding tables.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163035 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp60
1 files changed, 60 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 42ea012f58..4f3d824b4a 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1110,6 +1110,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
{ X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
// FIXME: add AVX 256-bit foldable instructions
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_16 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_16 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -1237,6 +1267,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_16 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_16 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {