aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--lib/Target/X86/X86InstrFMA.td100
3 files changed, 76 insertions, 36 deletions
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 5039887e1a..a54d35dda2 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -379,6 +379,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
switch (type) {
+ case TYPE_XMM32:
+ case TYPE_XMM64:
case TYPE_XMM128:
mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
return;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b2b30fe387..f9184f693d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1052,7 +1052,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
- if (Subtarget->hasFMA()) {
+ if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Custom);
setOperationAction(ISD::FMA, MVT::v4f64, Custom);
setOperationAction(ISD::FMA, MVT::v4f32, Custom);
@@ -15606,7 +15606,8 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA())
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
+ (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
return SDValue();
SDValue A = N->getOperand(0);
@@ -15628,9 +15629,10 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode;
if (!NegMul)
- Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB;
+ Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
else
- Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB;
+ Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
+
return DAG.getNode(Opcode, dl, VT, A, B, C);
}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 445bbaa4c1..f99440d973 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -193,34 +193,57 @@ defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
//===----------------------------------------------------------------------===//
-multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop,
- ComplexPattern mem_cpat, Intrinsic Int> {
- def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
+ PatFrag mem_frag> {
+ def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
- def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, memop:$src3),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4;
+ def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4;
- def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, memop:$src2, VR128:$src3),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)))]>, VEX_W, MemOp4;
+ def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+ [(set RC:$dst,
+ (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
// For disassembler
let isCodeGenOnly = 1 in
- def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
}
+multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
+ ComplexPattern mem_cpat, Intrinsic Int> {
+ def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
+ mem_cpat:$src3))]>, VEX_W, MemOp4;
+ def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, memop:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+}
+
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256> {
@@ -277,34 +300,47 @@ let isCodeGenOnly = 1 in {
let Predicates = [HasFMA4] in {
-defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfmadd_ss>;
-defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfmadd_sd>;
+defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
+ fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfmadd_ss>;
+defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
+ fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmadd_sd>;
+defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
+ fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfmsub_ss>;
+defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
+ fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmsub_sd>;
+defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+ X86Fnmadd, loadf32>,
+ fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmadd_ss>;
+defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+ X86Fnmadd, loadf64>,
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmadd_sd>;
+defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+ X86Fnmsub, loadf32>,
+ fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmsub_ss>;
+defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+ X86Fnmsub, loadf64>,
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmsub_sd>;
+
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
memopv4f32, memopv8f32>;
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
memopv2f64, memopv4f64>;
-defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfmsub_ss>;
-defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfmsub_sd>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
memopv4f32, memopv8f32>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
memopv2f64, memopv4f64>;
-defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfnmadd_ss>;
-defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmadd_sd>;
defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
memopv4f32, memopv8f32>;
defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
memopv2f64, memopv4f64>;
-defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfnmsub_ss>;
-defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmsub_sd>;
defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
memopv4f32, memopv8f32>;
defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,