aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2011-11-25 19:33:42 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2011-11-25 19:33:42 +0000
commit1b9b377975b3f437acef8c2ba90de582add52f65 (patch)
tree3b924903b539999e3fcba3bd52254b3331f6d4e7
parent70aaf37c11bbfffc8d3e007556da46c810e822a3 (diff)
This patch contains support for encoding FMA4 instructions and
tablegen patterns for scalar FMA4 operations and intrinsic. Also add tests for vfmaddsd. Patch by Jan Sjodin git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145133 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IntrinsicsX86.td10
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h10
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp34
-rw-r--r--lib/Target/X86/X86InstrFMA.td39
-rw-r--r--lib/Target/X86/X86InstrFormats.td10
-rw-r--r--test/CodeGen/X86/fma4-intrinsics-x86_64.ll9
-rw-r--r--test/MC/X86/x86_64-fma4-encoding.s13
7 files changed, 118 insertions, 7 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index cba6599d66..ea7597ca2c 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1822,6 +1822,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
//===----------------------------------------------------------------------===//
+// FMA4
+
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+ [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
// MMX
// Empty MMX state op.
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index c50f785172..213a79d670 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -418,7 +418,12 @@ namespace X86II {
/// storing a classifier in the imm8 field. To simplify our implementation,
/// we handle this by storeing the classifier in the opcode field and using
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
- Has3DNow0F0FOpcode = 1U << 7
+ Has3DNow0F0FOpcode = 1U << 7,
+
+ /// XOP_W - Same bit as VEX_W. Used to indicate swapping of
+ /// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used
+ /// for FMA4 and XOP instructions.
+ XOP_W = 1U << 8
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -488,9 +493,12 @@ namespace X86II {
return 0;
case X86II::MRMSrcMem: {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+ if (HasXOP_W)
+ ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
// FIXME: Maybe lea should have its own form? This is a horrible hack.
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 1ab469cc00..dbd52078d8 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -415,6 +415,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// opcode extension, or ignored, depending on the opcode byte)
unsigned char VEX_W = 0;
+ // XOP_W: opcode specific, same bit as VEX_W, but used to
+ // swap operand 3 and 4 for FMA4 and XOP instructions
+ unsigned char XOP_W = 0;
+
// VEX_5M (VEX m-mmmmm field):
//
// 0b00000: Reserved for future use
@@ -453,6 +457,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
VEX_W = 1;
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W)
+ XOP_W = 1;
+
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
@@ -529,6 +536,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// src1(ModR/M), MemAddr, imm8
// src1(ModR/M), MemAddr, src2(VEX_I8IMM)
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
VEX_R = 0x0;
@@ -629,7 +639,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 3 byte VEX prefix
EmitByte(0xC4, CurByte, OS);
EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
- EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+ EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS);
}
/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
@@ -889,6 +899,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
+ unsigned XOP_W_I8IMMOperand = 2;
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
@@ -961,6 +973,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
SrcRegNum++;
+ // GAS sets the XOP_W even with register operands, we want to match this.
+ // XOP_W is ignored, so there is no swapping of the operands
+ XOP_W_I8IMMOperand = 3;
+
EmitRegModRMByte(MI.getOperand(SrcRegNum),
GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
CurOp = SrcRegNum + 1;
@@ -975,14 +991,20 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
++AddrOperands;
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
}
+ if(HasXOP_W) // Skip second register source (encoded in I8IMM)
+ ++FirstMemOp;
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
TSFlags, CurByte, OS, Fixups);
- CurOp += AddrOperands + 1;
- if (HasVEX_4VOp3)
- ++CurOp;
+ if(HasXOP_W) {
+ CurOp = NumOps - 1; // We have consumed all except one operand (third)
+ } else {
+ CurOp += AddrOperands + 1;
+ if (HasVEX_4VOp3)
+ ++CurOp;
+ }
break;
}
@@ -1064,7 +1086,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
- const MCOperand &MO = MI.getOperand(CurOp++);
+ const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand
+ : CurOp);
+ CurOp++;
bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
RegNum |= GetX86RegNum(MO) << 4;
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index d868773d2d..bdf797d5e1 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -58,3 +58,42 @@ let isAsmParserOnly = 1 in {
defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
}
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+
+multiclass fma4s<bits<8> opc, string OpcodeStr> {
+ def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src3, $src1, $dst|$dst, $src1, $src3, $src2}"),
+ []>, XOP_W;
+ def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">;
+}
+
+// FMA4 Intrinsics patterns
+
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index ecd6a93ef0..08c56c2e69 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -118,7 +118,7 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
-
+class XOP_W { bit hasXOP_WPrefix = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
: Instruction {
@@ -158,6 +158,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+ bit hasXOP_WPrefix = 0; // Same bit as VEX_W, but used for swapping operands
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -179,6 +180,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{38} = hasVEX_L;
let TSFlags{39} = ignoresVEX_L;
let TSFlags{40} = has3DNow0F0FOpcode;
+ let TSFlags{41} = hasXOP_WPrefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -496,6 +498,12 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
OpSize, VEX_4V, Requires<[HasFMA3]>;
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+
// X86-64 Instruction templates...
//
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
new file mode 100644
index 0000000000..39c2311eb5
--- /dev/null
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=fma4 | FileCheck %s
+
+define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+ ; CHECK: vfmaddsd
+ %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
diff --git a/test/MC/X86/x86_64-fma4-encoding.s b/test/MC/X86/x86_64-fma4-encoding.s
new file mode 100644
index 0000000000..e0d2602901
--- /dev/null
+++ b/test/MC/X86/x86_64-fma4-encoding.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
+ vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x01,0x10]
+ vfmaddsd %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
+ vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0