diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-01-18 23:10:30 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-01-18 23:10:30 +0000 |
commit | 48177ac90fb940833b9deea1a6716092348cfe82 (patch) | |
tree | 5252f0617e256f0dd1f8b26082f05a088d7232b9 /test/CodeGen/X86 | |
parent | 7336f7febb5170b374a4cbffee273ad82ff8a1a3 (diff) |
On Sandybridge loading unaligned 256bits using two XMM loads (vmovups and vinsertf128) is faster than using a single vmovups instruction.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172868 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r-- | test/CodeGen/X86/sandybridge-loads.ll | 21 | ||||
-rw-r--r-- | test/CodeGen/X86/v8i1-masks.ll | 2 |
2 files changed, 22 insertions, 1 deletions
diff --git a/test/CodeGen/X86/sandybridge-loads.ll b/test/CodeGen/X86/sandybridge-loads.ll new file mode 100644 index 0000000000..d85c32eaa7 --- /dev/null +++ b/test/CodeGen/X86/sandybridge-loads.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s + +;CHECK: wideloads +;CHECK: vmovaps +;CHECK: vinsertf128 +;CHECK: vmovups +;CHECK-NOT: vinsertf128 +;CHECK: ret + +define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp { + %v0 = load <8 x float>* %a, align 16 ; <---- unaligned! + %v1 = load <8 x float>* %b, align 32 ; <---- aligned! + %m0 = fcmp olt <8 x float> %v1, %v0 + %v2 = load <8 x float>* %c, align 16 + %m1 = fcmp olt <8 x float> %v2, %v0 + %mand = and <8 x i1> %m1, %m0 + %r = zext <8 x i1> %mand to <8 x i32> + store <8 x i32> %r, <8 x i32>* undef, align 16 + ret void +} + diff --git a/test/CodeGen/X86/v8i1-masks.ll b/test/CodeGen/X86/v8i1-masks.ll index abb4b39bd6..ea231aff5b 100644 --- a/test/CodeGen/X86/v8i1-masks.ll +++ b/test/CodeGen/X86/v8i1-masks.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s ;CHECK: and_masks -;CHECK: vmovups +;CHECK: vmovaps ;CHECK: vcmpltp ;CHECK: vcmpltp ;CHECK: vandps |