diff options
author | Michael Liao <michael.liao@intel.com> | 2012-10-29 17:57:12 +0000 |
---|---|---|
committer | Michael Liao <michael.liao@intel.com> | 2012-10-29 17:57:12 +0000 |
commit | 2a2263e744130ccc7f73c88021bca4cc037eb35e (patch) | |
tree | 351c6374803f6541c3c1cfc2d6987a2be7aedb18 | |
parent | 9cc1fad5ad645247080e22dec64128bec8054341 (diff) |
Fix PR14204
- Add missing pattern on X86ISD::VZEXT from VR256 to VR256 when AVX2 is enabled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166947 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 15 | ||||
-rw-r--r-- | test/CodeGen/X86/pr14204.ll | 15 |
2 files changed, 30 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e9c7f3e7f1..3fcc0dc414 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5850,6 +5850,21 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>; def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>; + + def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))), + (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))), + (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))), + (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; } let Predicates = [HasAVX] in { diff --git a/test/CodeGen/X86/pr14204.ll b/test/CodeGen/X86/pr14204.ll new file mode 100644 index 0000000000..42e362bf3b --- /dev/null +++ b/test/CodeGen/X86/pr14204.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=core-avx2 | FileCheck %s + +; FIXME: vpmovsxwd should be generated instead of vpmovzxwd followed by +; SLL/SRA. + +define <8 x i32> @foo(<8 x i1> %bar) nounwind readnone { +entry: + %s = sext <8 x i1> %bar to <8 x i32> + ret <8 x i32> %s +; CHECK: foo +; CHECK: vpmovzxwd +; CHECK: vpslld +; CHECK: vpsrad +; CHECK: ret +} |