From d6fb53adb19ccfbfb1eedec11c899aaa8401d036 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 27 Dec 2012 08:15:45 +0000 Subject: On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized register. In most cases we actually compare or select YMM-sized registers and mixing the two types creates horrible code. This commit optimizes some of the transition sequences. PR14657. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171148 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/v8i1-masks.ll | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 test/CodeGen/X86/v8i1-masks.ll (limited to 'test') diff --git a/test/CodeGen/X86/v8i1-masks.ll b/test/CodeGen/X86/v8i1-masks.ll new file mode 100644 index 0000000000..01079997a3 --- /dev/null +++ b/test/CodeGen/X86/v8i1-masks.ll @@ -0,0 +1,38 @@ +; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s + +;CHECK: and_masks +;CHECK: vmovups +;CHECK-NEXT: vcmpltp +;CHECK-NEXT: vandps +;CHECK-NEXT: vmovups +;CHECK: ret + +define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp { + %v0 = load <8 x float>* %a, align 16 + %v1 = load <8 x float>* %b, align 16 + %m0 = fcmp olt <8 x float> %v1, %v0 + %v2 = load <8 x float>* %c, align 16 + %m1 = fcmp olt <8 x float> %v2, %v0 + %mand = and <8 x i1> %m1, %m0 + %r = zext <8 x i1> %mand to <8 x i32> + store <8 x i32> %r, <8 x i32>* undef, align 16 + ret void +} + +;CHECK: neg_mask +;CHECK: vmovups +;CHECK-NEXT: vcmpltps +;CHECK-NEXT: vandps +;CHECK-NEXT: vmovups +;CHECK: ret + +define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp { + %v0 = load <8 x float>* %a, align 16 + %v1 = load <8 x float>* %b, align 16 + %m0 = fcmp olt <8 x float> %v1, %v0 + %mand = xor <8 x i1> %m0, + %r = zext <8 x i1> %mand to <8 x i32> + store <8 x i32> %r, <8 x i32>* undef, align 16 + ret void +} + -- cgit v1.2.3-18-g5258