aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrSSE.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrSSE.td')
-rw-r--r--lib/Target/X86/X86InstrSSE.td170
1 files changed, 166 insertions, 4 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 99542c189b..412b7ac9d3 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3884,10 +3884,64 @@ let Predicates = [HasAVX] in {
(VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's.
- def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
(VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
- def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
(VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+
+ def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
+ (VPSLLWri VR128:$src1, imm:$src2)>;
+ def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
+ (VPSLLDri VR128:$src1, imm:$src2)>;
+ def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
+ (VPSLLQri VR128:$src1, imm:$src2)>;
+
+ def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
+ (VPSRLWri VR128:$src1, imm:$src2)>;
+ def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
+ (VPSRLDri VR128:$src1, imm:$src2)>;
+ def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
+ (VPSRLQri VR128:$src1, imm:$src2)>;
+
+ def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
+ (VPSRAWri VR128:$src1, imm:$src2)>;
+ def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
+ (VPSRADri VR128:$src1, imm:$src2)>;
+
+ def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
+ (VPSLLWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (VPSLLWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
+ (VPSLLDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSLLDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
+ (VPSLLQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
+ (VPSLLQrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
+ (VPSRLWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (VPSRLWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
+ (VPSRLDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSRLDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
+ (VPSRLQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
+ (VPSRLQrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
+ (VPSRAWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (VPSRAWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
+ (VPSRADrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSRADrm VR128:$src1, addr:$src2)>;
}
let Predicates = [HasAVX2] in {
@@ -3895,6 +3949,60 @@ let Predicates = [HasAVX2] in {
(VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
(VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
+
+ def : Pat<(v16i16 (X86vshli VR256:$src1, (i32 imm:$src2))),
+ (VPSLLWYri VR256:$src1, imm:$src2)>;
+ def : Pat<(v8i32 (X86vshli VR256:$src1, (i32 imm:$src2))),
+ (VPSLLDYri VR256:$src1, imm:$src2)>;
+ def : Pat<(v4i64 (X86vshli VR256:$src1, (i32 imm:$src2))),
+ (VPSLLQYri VR256:$src1, imm:$src2)>;
+
+ def : Pat<(v16i16 (X86vsrli VR256:$src1, (i32 imm:$src2))),
+ (VPSRLWYri VR256:$src1, imm:$src2)>;
+ def : Pat<(v8i32 (X86vsrli VR256:$src1, (i32 imm:$src2))),
+ (VPSRLDYri VR256:$src1, imm:$src2)>;
+ def : Pat<(v4i64 (X86vsrli VR256:$src1, (i32 imm:$src2))),
+ (VPSRLQYri VR256:$src1, imm:$src2)>;
+
+ def : Pat<(v16i16 (X86vsrai VR256:$src1, (i32 imm:$src2))),
+ (VPSRAWYri VR256:$src1, imm:$src2)>;
+ def : Pat<(v8i32 (X86vsrai VR256:$src1, (i32 imm:$src2))),
+ (VPSRADYri VR256:$src1, imm:$src2)>;
+
+ def : Pat<(v16i16 (X86vshl VR256:$src1, (v8i16 VR128:$src2))),
+ (VPSLLWYrr VR256:$src1, VR128:$src2)>;
+ def : Pat<(v16i16 (X86vshl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (VPSLLWYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86vshl VR256:$src1, (v4i32 VR128:$src2))),
+ (VPSLLDYrr VR256:$src1, VR128:$src2)>;
+ def : Pat<(v8i32 (X86vshl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSLLDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86vshl VR256:$src1, (v2i64 VR128:$src2))),
+ (VPSLLQYrr VR256:$src1, VR128:$src2)>;
+ def : Pat<(v4i64 (X86vshl VR256:$src1, (memopv2i64 addr:$src2))),
+ (VPSLLQYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(v16i16 (X86vsrl VR256:$src1, (v8i16 VR128:$src2))),
+ (VPSRLWYrr VR256:$src1, VR128:$src2)>;
+ def : Pat<(v16i16 (X86vsrl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (VPSRLWYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86vsrl VR256:$src1, (v4i32 VR128:$src2))),
+ (VPSRLDYrr VR256:$src1, VR128:$src2)>;
+ def : Pat<(v8i32 (X86vsrl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSRLDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86vsrl VR256:$src1, (v2i64 VR128:$src2))),
+ (VPSRLQYrr VR256:$src1, VR128:$src2)>;
+ def : Pat<(v4i64 (X86vsrl VR256:$src1, (memopv2i64 addr:$src2))),
+ (VPSRLQYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(v16i16 (X86vsra VR256:$src1, (v8i16 VR128:$src2))),
+ (VPSRAWYrr VR256:$src1, VR128:$src2)>;
+ def : Pat<(v16i16 (X86vsra VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (VPSRAWYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86vsra VR256:$src1, (v4i32 VR128:$src2))),
+ (VPSRADYrr VR256:$src1, VR128:$src2)>;
+ def : Pat<(v8i32 (X86vsra VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSRADYrm VR256:$src1, addr:$src2)>;
}
let Predicates = [HasSSE2] in {
@@ -3906,10 +4014,64 @@ let Predicates = [HasSSE2] in {
(PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's.
- def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
(PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
- def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
(PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+
+ def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
+ (PSLLWri VR128:$src1, imm:$src2)>;
+ def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
+ (PSLLDri VR128:$src1, imm:$src2)>;
+ def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
+ (PSLLQri VR128:$src1, imm:$src2)>;
+
+ def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
+ (PSRLWri VR128:$src1, imm:$src2)>;
+ def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
+ (PSRLDri VR128:$src1, imm:$src2)>;
+ def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
+ (PSRLQri VR128:$src1, imm:$src2)>;
+
+ def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
+ (PSRAWri VR128:$src1, imm:$src2)>;
+ def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
+ (PSRADri VR128:$src1, imm:$src2)>;
+
+ def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
+ (PSLLWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PSLLWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
+ (PSLLDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PSLLDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
+ (PSLLQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
+ (PSLLQrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
+ (PSRLWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PSRLWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
+ (PSRLDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PSRLDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
+ (PSRLQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
+ (PSRLQrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
+ (PSRAWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PSRAWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
+ (PSRADrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PSRADrm VR128:$src1, addr:$src2)>;
}
//===---------------------------------------------------------------------===//