aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2012-07-26 22:59:06 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2012-07-26 22:59:06 +0000
commit4db2dbf921d76cb0ea018a4813ad4c4032cdf15a (patch)
tree6cdfe7c8b593307ca4b6640daa718008142aebb9
parent1121c786fc24a4a8ce6c778cb9b5acdfa9006ffb (diff)
Eliminate sub_ss, sub_sd from broadcast patterns.
The (COPY_TO_REGCLASS GR32:$src, VR128) pattern looks odd, but copyPhysReg does the right thing with it. (The old pattern would eventually produce the same cross-class copy). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160830 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrSSE.td58
1 files changed, 16 insertions, 42 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c4d5ad0766..fb3953b2c8 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7704,24 +7704,18 @@ let Predicates = [HasAVX2] in {
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSrr
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+ (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSYrr
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VBROADCASTSDYrr
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSrr
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
+ (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSYrr
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDYrr
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
}
}
@@ -7745,46 +7739,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
let AddedComplexity = 20 in {
// 128bit broadcasts:
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
- 0), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
- 0x44),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
- 0x44), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss),
- 0), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
- 0x44),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
- 0x44), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
}
}