diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-07-21 02:24:08 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-07-21 02:24:08 +0000 |
commit | dbd4fe2b0ada8014c2c8e042651de5799a1d4c5d (patch) | |
tree | ba4e4c406bec4a597cbd92021c87b43ff184353c | |
parent | 65b74e1d00eef81b596b4c207fba069aa1eb8214 (diff) |
- Register v16i16 as valid VR256 register class
- Add more bitcasts for v16i16
- Since 135661 and 135662 already added the splat logic,
just add one more splat test for v16i16
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135663 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 30 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-256-splat.ll | 13 |
3 files changed, 34 insertions, 20 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1c827f0dd3..6ce531a960 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -970,11 +970,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); if (!UseSoftFloat && Subtarget->hasAVX()) { - addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); - addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); - addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); - addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); - addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); + addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); + addRegisterClass(MVT::v16i16, X86::VR256RegisterClass); + addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); + addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); + addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 73e465fe29..47aa7da68c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3668,20 +3668,22 @@ let Predicates = [HasXMMInt] in { } let Predicates = [HasAVX] in { - def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; } // Move scalar to XMM zero-extended diff --git a/test/CodeGen/X86/avx-256-splat.ll b/test/CodeGen/X86/avx-256-splat.ll index 27ff9268b4..39230fe4fb 100644 --- a/test/CodeGen/X86/avx-256-splat.ll +++ b/test/CodeGen/X86/avx-256-splat.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s -; FIXME: use avx versions for punpcklbw and punpckhbw +; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd ; CHECK: vextractf128 $0 ; CHECK-NEXT: punpcklbw @@ -14,3 +14,14 @@ entry: ret <32 x i8> %shuffle } +; CHECK: vextractf128 $0 +; CHECK-NEXT: punpckhwd +; CHECK-NEXT: vinsertf128 $0 +; CHECK-NEXT: vinsertf128 $1 +; CHECK-NEXT: vpermilps $85 +define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> + ret <16 x i16> %shuffle +} + |