diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2012-12-22 11:34:28 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2012-12-22 11:34:28 +0000 |
commit | 17347912b46213658074416133396caffd034e0c (patch) | |
tree | 7ffd4581245e74f4d8922c2a459d7231c3a3808d /test/CodeGen/X86/avx-sext.ll | |
parent | b44c1f90e465a1905cff00212929520ab1f36b64 (diff) |
X86: Emit vector sext as shuffle + sra if vpmovsx is not available.
Also loosen the SSSE3 dependency a bit, expanded pshufb + psra is still better
than scalarized loads. Fixes PR14590.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170984 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/avx-sext.ll')
-rwxr-xr-x | test/CodeGen/X86/avx-sext.ll | 119 |
1 files changed, 96 insertions, 23 deletions
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll index 425d09ca04..8d7d79db7d 100755 --- a/test/CodeGen/X86/avx-sext.ll +++ b/test/CodeGen/X86/avx-sext.ll @@ -1,69 +1,142 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSSE3 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=pentium4 | FileCheck %s -check-prefix=SSE2 define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { -;CHECK: sext_8i16_to_8i32 -;CHECK: vpmovsxwd +; AVX: sext_8i16_to_8i32 +; AVX: vpmovsxwd %B = sext <8 x i16> %A to <8 x i32> ret <8 x i32>%B } define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { -;CHECK: sext_4i32_to_4i64 -;CHECK: vpmovsxdq +; AVX: sext_4i32_to_4i64 +; AVX: vpmovsxdq %B = sext <4 x i32> %A to <4 x i64> ret <4 x i64>%B } -; CHECK: load_sext_test1 -; CHECK: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}} -; CHECK: ret +; AVX: load_sext_test1 +; AVX: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}} +; AVX: ret + +; SSSE3: load_sext_test1 +; SSSE3: movq +; SSSE3: punpcklwd %xmm{{.*}}, %xmm{{.*}} +; SSSE3: psrad $16 +; SSSE3: ret + +; SSE2: load_sext_test1 +; SSE2: movq +; SSE2: punpcklwd %xmm{{.*}}, %xmm{{.*}} +; SSE2: psrad $16 +; SSE2: ret define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) { %X = load <4 x i16>* %ptr %Y = sext <4 x i16> %X to <4 x i32> ret <4 x i32>%Y } -; CHECK: load_sext_test2 -; CHECK: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}} -; CHECK: ret +; AVX: load_sext_test2 +; AVX: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}} +; AVX: ret + +; SSSE3: load_sext_test2 +; SSSE3: movd +; SSSE3: pshufb +; SSSE3: psrad $24 +; SSSE3: ret + +; SSE2: load_sext_test2 +; SSE2: movl +; SSE2: psrad $24 +; SSE2: ret define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) { %X = load <4 x i8>* %ptr %Y = sext <4 x i8> %X to <4 x i32> ret <4 x i32>%Y } -; CHECK: load_sext_test3 -; CHECK: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}} -; CHECK: ret +; AVX: load_sext_test3 +; AVX: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}} +; AVX: ret + +; SSSE3: load_sext_test3 +; SSSE3: movsbq +; SSSE3: movsbq +; SSSE3: punpcklqdq +; SSSE3: ret + +; SSE2: load_sext_test3 +; SSE2: movsbq +; SSE2: movsbq +; SSE2: punpcklqdq +; SSE2: ret define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) { %X = load <2 x i8>* %ptr %Y = sext <2 x i8> %X to <2 x i64> ret <2 x i64>%Y } -; CHECK: load_sext_test4 -; CHECK: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}} -; CHECK: ret +; AVX: load_sext_test4 +; AVX: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}} +; AVX: ret + +; SSSE3: load_sext_test4 +; SSSE3: movswq +; SSSE3: movswq +; SSSE3: punpcklqdq +; SSSE3: ret + +; SSE2: load_sext_test4 +; SSE2: movswq +; SSE2: movswq +; SSE2: punpcklqdq +; SSE2: ret define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) { %X = load <2 x i16>* %ptr %Y = sext <2 x i16> %X to <2 x i64> ret <2 x i64>%Y } -; CHECK: load_sext_test5 -; CHECK: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}} -; CHECK: ret +; AVX: load_sext_test5 +; AVX: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}} +; AVX: ret + +; SSSE3: load_sext_test5 +; SSSE3: movslq +; SSSE3: movslq +; SSSE3: punpcklqdq +; SSSE3: ret + +; SSE2: load_sext_test5 +; SSE2: movslq +; SSE2: movslq +; SSE2: punpcklqdq +; SSE2: ret define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) { %X = load <2 x i32>* %ptr %Y = sext <2 x i32> %X to <2 x i64> ret <2 x i64>%Y } -; CHECK: load_sext_test6 -; CHECK: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}} -; CHECK: ret +; AVX: load_sext_test6 +; AVX: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}} +; AVX: ret + +; SSSE3: load_sext_test6 +; SSSE3: movq +; SSSE3: punpcklbw +; SSSE3: psraw $8 +; SSSE3: ret + +; SSE2: load_sext_test6 +; SSE2: movq +; SSE2: punpcklbw +; SSE2: psraw $8 +; SSE2: ret define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) { %X = load <8 x i8>* %ptr %Y = sext <8 x i8> %X to <8 x i16> |