diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2010-07-07 01:14:56 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2010-07-07 01:14:56 +0000 |
commit | 09df2ae0d056f846850732b4ec1ab49dee9791cc (patch) | |
tree | ee600205ce23efdf0487f48e72e2404e013fd446 | |
parent | 3c1482231290e4c180556e090475bd09e7a26480 (diff) |
Add AVX SSE4.1 insertps, ptest and movntdqa instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107747 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 57 | ||||
-rw-r--r-- | test/MC/AsmParser/X86/x86_32-encoding.s | 20 | ||||
-rw-r--r-- | test/MC/AsmParser/X86/x86_64-encoding.s | 20 |
3 files changed, 79 insertions, 18 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a1c25acb9c..72619ad171 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4602,33 +4602,49 @@ let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in // are optimized inserts that won't zero arbitrary elements in the destination // vector. The next one matches the intrinsic and could zero arbitrary elements // in the target vector. -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, +multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86insrtps VR128:$src1, - (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, OpSize; - } + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86insrtps VR128:$src1, + (v4f32 (scalar_to_vector (loadf32 addr:$src2))), + imm:$src3))]>, OpSize; } -defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { +def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + OpSize, VEX; +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, + OpSize, VEX; +} + let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest \t{$src2, $src1|$src1, $src2}", @@ -4640,6 +4656,11 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in +def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, + OpSize, VEX; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index f0a879bb90..eae0f6584f 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -12102,3 +12102,23 @@ // CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] vpinsrd $7, (%eax), %xmm2, %xmm5 +// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07] + vinsertps $7, %xmm2, %xmm5, %xmm1 + +// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07] + vinsertps $7, (%eax), %xmm5, %xmm1 + +// CHECK: vptest %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea] + vptest %xmm2, %xmm5 + +// CHECK: vptest (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10] + vptest (%eax), %xmm2 + +// CHECK: vmovntdqa (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] + vmovntdqa (%eax), %xmm2 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 013e2a5271..29529f3901 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -2166,3 +2166,23 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] vpinsrq $7, (%rax), %xmm12, %xmm10 +// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07] + vinsertps $7, %xmm12, %xmm10, %xmm11 + +// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07] + vinsertps $7, (%rax), %xmm10, %xmm11 + +// CHECK: vptest %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4] + vptest %xmm12, %xmm10 + +// CHECK: vptest (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20] + vptest (%rax), %xmm12 + +// CHECK: vmovntdqa (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] + vmovntdqa (%rax), %xmm12 + |