diff options
author | Nadav Rotem <nrotem@apple.com> | 2012-10-03 16:11:15 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2012-10-03 16:11:15 +0000 |
commit | c653de6c0f3722154a41cbb57c213a0cdd789419 (patch) | |
tree | 9913b3a243254041e8ad9c7378737779a9101d97 /test/CodeGen/X86/MergeConsecutiveStores.ll | |
parent | bfcb4aa10b5948539f6ee59eecfe88faa9fc4e94 (diff) |
A DAGCombine optimization for mergeing consecutive stores to memory. The optimization
is not profitable in many cases because modern processors perform multiple stores
in parallel and merging stores prior to merging requires extra work. We handle two main cases:
1. Store of multiple consecutive constants:
q->a = 3;
q->4 = 5;
In this case we store a single legal wide integer.
2. Store of multiple consecutive loads:
int a = p->a;
int b = p->b;
q->a = a;
q->b = b;
In this case we load/store either ilegal vector registers or legal wide integer registers.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165125 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/MergeConsecutiveStores.ll')
-rw-r--r-- | test/CodeGen/X86/MergeConsecutiveStores.ll | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll new file mode 100644 index 0000000000..79f8ee54a2 --- /dev/null +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -0,0 +1,273 @@ +; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } +%struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } + +; Move all of the constants using a single vector store. +; CHECK: merge_const_store +; save 1,2,3 ... as one big integer. +; CHECK: movabsq $578437695752307201 +; CHECK: ret +define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge +.lr.ph: + %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] + %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] + %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 + store i8 1, i8* %2, align 1 + %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 + store i8 2, i8* %3, align 1 + %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 + store i8 3, i8* %4, align 1 + %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 + store i8 4, i8* %5, align 1 + %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 + store i8 5, i8* %6, align 1 + %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 + store i8 6, i8* %7, align 1 + %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 + store i8 7, i8* %8, align 1 + %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 + store i8 8, i8* %9, align 1 + %10 = add nsw i32 %i.02, 1 + %11 = getelementptr inbounds %struct.A* %.01, i64 1 + %exitcond = icmp eq i32 %10, %count + br i1 %exitcond, label %._crit_edge, label %.lr.ph +._crit_edge: + ret void +} + +; Move the first 4 constants as a single vector. Move the rest as scalars. +; CHECK: merge_nonconst_store +; CHECK: movl $67305985 +; CHECK: movb +; CHECK: movb +; CHECK: movb +; CHECK: movb +; CHECK: ret +define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge +.lr.ph: + %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] + %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] + %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 + store i8 1, i8* %2, align 1 + %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 + store i8 2, i8* %3, align 1 + %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 + store i8 3, i8* %4, align 1 + %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 + store i8 4, i8* %5, align 1 + %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 + store i8 %zz, i8* %6, align 1 ; <----------- Not a const; + %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 + store i8 6, i8* %7, align 1 + %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 + store i8 7, i8* %8, align 1 + %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 + store i8 8, i8* %9, align 1 + %10 = add nsw i32 %i.02, 1 + %11 = getelementptr inbounds %struct.A* %.01, i64 1 + %exitcond = icmp eq i32 %10, %count + br i1 %exitcond, label %._crit_edge, label %.lr.ph +._crit_edge: + ret void +} + + +;CHECK: merge_loads_i16 +; load: +;CHECK: movw +; store: +;CHECK: movw +;CHECK: ret +define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 + %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 + br label %4 + +; <label>:4 ; preds = %4, %.lr.ph + %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] + %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ] + %5 = load i8* %2, align 1 + %6 = load i8* %3, align 1 + %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 + store i8 %5, i8* %7, align 1 + %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 + store i8 %6, i8* %8, align 1 + %9 = add nsw i32 %i.02, 1 + %10 = getelementptr inbounds %struct.A* %.01, i64 1 + %exitcond = icmp eq i32 %9, %count + br i1 %exitcond, label %._crit_edge, label %4 + +._crit_edge: ; preds = %4, %0 + ret void +} + +; The loads and the stores are interleved. Can't merge them. +;CHECK: no_merge_loads +;CHECK: movb +;CHECK: movb +;CHECK: movb +;CHECK: movb +;CHECK: ret +define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 + %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 + br label %a4 + +a4: ; preds = %4, %.lr.ph + %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ] + %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ] + %a5 = load i8* %2, align 1 + %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 + store i8 %a5, i8* %a7, align 1 + %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 + %a6 = load i8* %3, align 1 + store i8 %a6, i8* %a8, align 1 + %a9 = add nsw i32 %i.02, 1 + %a10 = getelementptr inbounds %struct.A* %.01, i64 1 + %exitcond = icmp eq i32 %a9, %count + br i1 %exitcond, label %._crit_edge, label %a4 + +._crit_edge: ; preds = %4, %0 + ret void +} + + +;CHECK: merge_loads_integer +; load: +;CHECK: movq +; store: +;CHECK: movq +;CHECK: ret +define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 + %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 + br label %4 + +; <label>:4 ; preds = %4, %.lr.ph + %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] + %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ] + %5 = load i32* %2 + %6 = load i32* %3 + %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 + store i32 %5, i32* %7 + %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 + store i32 %6, i32* %8 + %9 = add nsw i32 %i.02, 1 + %10 = getelementptr inbounds %struct.B* %.01, i64 1 + %exitcond = icmp eq i32 %9, %count + br i1 %exitcond, label %._crit_edge, label %4 + +._crit_edge: ; preds = %4, %0 + ret void +} + + +;CHECK: merge_loads_vector +; load: +;CHECK: movups +; store: +;CHECK: movups +;CHECK: ret +define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { + %a1 = icmp sgt i32 %count, 0 + br i1 %a1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 + %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 + %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2 + %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3 + br label %block4 + +block4: ; preds = %4, %.lr.ph + %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] + %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] + %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 + %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 + %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 + %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 + %b1 = load i32* %a2 + %b2 = load i32* %a3 + %b3 = load i32* %a4 + %b4 = load i32* %a5 + store i32 %b1, i32* %a7 + store i32 %b2, i32* %a8 + store i32 %b3, i32* %a9 + store i32 %b4, i32* %a10 + %c9 = add nsw i32 %i.02, 1 + %c10 = getelementptr inbounds %struct.B* %.01, i64 1 + %exitcond = icmp eq i32 %c9, %count + br i1 %exitcond, label %._crit_edge, label %block4 + +._crit_edge: ; preds = %4, %0 + ret void +} + + +;CHECK: merge_loads_no_align +; load: +;CHECK: movl +;CHECK: movl +;CHECK: movl +;CHECK: movl +; store: +;CHECK: movl +;CHECK: movl +;CHECK: movl +;CHECK: movl +;CHECK: ret +define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { + %a1 = icmp sgt i32 %count, 0 + br i1 %a1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0 + %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1 + %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2 + %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3 + br label %block4 + +block4: ; preds = %4, %.lr.ph + %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] + %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] + %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 + %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 + %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 + %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 + %b1 = load i32* %a2, align 1 + %b2 = load i32* %a3, align 1 + %b3 = load i32* %a4, align 1 + %b4 = load i32* %a5, align 1 + store i32 %b1, i32* %a7, align 1 + store i32 %b2, i32* %a8, align 1 + store i32 %b3, i32* %a9, align 1 + store i32 %b4, i32* %a10, align 1 + %c9 = add nsw i32 %i.02, 1 + %c10 = getelementptr inbounds %struct.B* %.01, i64 1 + %exitcond = icmp eq i32 %c9, %count + br i1 %exitcond, label %._crit_edge, label %block4 + +._crit_edge: ; preds = %4, %0 + ret void +} + |