aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-11-07 02:00:04 +0000
committerCraig Topper <craig.topper@gmail.com>2011-11-07 02:00:04 +0000
commit69f5df777819cf7a00975280b46b4ef9afa2f745 (patch)
tree524c8649c5aff0cdcc3005b7707f9896df8ac72f
parent9595ede514be4080eb8ff088b0d26cd09923cd2b (diff)
Add AVX2 VEXTRACTI128 and VINSERTI128 instructions. Fix VPERM2I128 to be qualified with HasAVX2 instead of HasAVX. Mark VINSERTF128 and VEXTRACTF128 as never having side effects.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143902 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IntrinsicsX86.td13
-rw-r--r--lib/Target/X86/X86InstrSSE.td40
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll16
3 files changed, 65 insertions, 4 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index fc9525a37b..345c2803f4 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1731,10 +1731,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
- llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
+// Vector extract and insert
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+}
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index de7326a280..fc36884bf1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7130,14 +7130,17 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
+let neverHasSideEffects = 1 in {
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
+let mayLoad = 1 in
def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f128mem:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
+}
def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
@@ -7174,14 +7177,17 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//
+let neverHasSideEffects = 1 in {
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
+let mayStore = 1 in
def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
(ins f128mem:$dst, VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
+}
def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
@@ -7514,16 +7520,46 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
-def VPERM2I128rr : AVXAIi8<0x46, MRMSrcReg, (outs VR256:$dst),
+def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst,
(int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>,
VEX_4V;
-def VPERM2I128rm : AVXAIi8<0x46, MRMSrcMem, (outs VR256:$dst),
+def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst,
(int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2),
imm:$src3))]>,
VEX_4V;
+
+//===----------------------------------------------------------------------===//
+// VINSERTI128 - Insert packed integer values
+//
+def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst,
+ (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>,
+ VEX_4V;
+def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
+ "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst,
+ (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
+ imm:$src3))]>, VEX_4V;
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTI128 - Extract packed integer values
+//
+def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+ VEX;
+let neverHasSideEffects = 1, mayStore = 1 in
+def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 7d67b998d8..4ab0884b5b 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -886,3 +886,19 @@ define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
+ ; CHECK: vextracti128
+ %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vinserti128
+ %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone