diff options
author | Nate Begeman <natebegeman@mac.com> | 2008-02-12 22:51:28 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2008-02-12 22:51:28 +0000 |
commit | cdd1eeca2c2da2e1c9b48e04f2f779ffe5cf3666 (patch) | |
tree | 4d4c204615e0945fb993fbf3ef9104fa935cf9df | |
parent | ffe3e2514a2550152267072ed7afeebb3dd3016c (diff) |
SSE4.1 64b integer insert/extract pattern support
Move formats into the formats file
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47035 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/X86/X86Instr64bit.td | 43 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 29 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 29 |
4 files changed, 73 insertions, 41 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ab970e556f..3e85538d9e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -636,7 +636,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); - // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); // Custom lower build_vector, vector_shuffle, and extract_vector_elt. @@ -652,9 +651,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); - if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); + } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { @@ -698,11 +700,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); if (Subtarget->is64Bit()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); - - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); } } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 9895853b28..5c7f093f6a 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1271,8 +1271,41 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), // X86-64 SSE4.1 Instructions //===----------------------------------------------------------------------===// -// PEXTRB, unary, TA, 0x14, REX.W -// PEXTRW, unary, TA, 0x15, REX.W -// PEXTRQ, unary, TA, 0x16, REX.W -// EXTRACTPS, unary, TA, 0x17, REX.W -// PINSRQ, 2addr, binary, TA, 0x22, REX.W +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { + def rr : SS4AI<opc, MRMSrcReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set GR64:$dst, + (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; + def mr : SS4AI<opc, MRMDestMem, (outs), + (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(store (extractelt (v2i64 VR128:$src1), imm:$src2), + addr:$dst)]>, OpSize, REX_W; +} + +defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; + +let isTwoAddress = 1 in { + multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> { + def rr : SS4AI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, + (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, + OpSize, REX_W; + def rm : SS4AI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, + (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), + imm:$src3)))]>, OpSize, REX_W; + } +} + +defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 3767e27747..86a327d64f 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -185,6 +185,35 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>; +// SSSE3 Instruction Templates: +// +// SS38I - SSSE3 instructions with T8 prefix. +// SS3AI - SSSE3 instructions with TA prefix. +// +// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version +// uses the MMX registers. We put those instructions here because they better +// fit into the SSSE3 instruction category rather than the MMX category. + +class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>; +class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>; + +// SSE4.1 Instruction Templates: +// +// SS48I - SSE 4.1 instructions with T8 prefix. +// SS41AI - SSE 4.1 instructions with TA prefix. +// +class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>; +class SS4AI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>; + + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e50716b2c2..183ee2c69f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2434,22 +2434,6 @@ let AddedComplexity = 20 in // SSSE3 Instructions //===----------------------------------------------------------------------===// -// SSSE3 Instruction Templates: -// -// SS38I - SSSE3 instructions with T8 prefix. -// SS3AI - SSSE3 instructions with TA prefix. -// -// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version -// uses the MMX registers. We put those instructions here because they better -// fit into the SSSE3 instruction category rather than the MMX category. - -class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>; -class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>; - /// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr, Intrinsic IntId64, Intrinsic IntId128> { @@ -3038,19 +3022,6 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst), // SSE4.1 Instructions //===----------------------------------------------------------------------===// -// SSE4.1 Instruction Templates: -// -// SS48I - SSE 4.1 instructions with T8 prefix. -// SS41AI - SSE 4.1 instructions with TA prefix. -// -class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>; -class SS4AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>; - - multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps, bits<8> opcsd, bits<8> opcpd, string OpcodeStr, |