diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-09-17 23:03:25 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-09-17 23:03:25 +0000 |
commit | 87f7864c6d81ae134335b8271ac12c937c81dffc (patch) | |
tree | 775f7cdefe682b919193144641cebe372ce0326c /test | |
parent | 98279e8d65fe5c86d0370b3e2a62f244985bec33 (diff) |
Merge into undefined lanes under -new-coalescer.
Add LIS::pruneValue() and extendToIndices(). These two functions are
used by the register coalescer when merging two live ranges requires
more than a trivial value mapping as supported by LiveInterval::join().
The pruneValue() function can remove the part of a value number that is
going to conflict in join(). Afterwards, extendToIndices can restore the
live range, using any new dominating value numbers and updating the SSA
form.
Use this complex value mapping to support merging a register into a
vector lane that has a conflicting value, but the clobbered lane is
undef.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164074 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/coalesce-subregs.ll | 50 |
1 files changed, 49 insertions, 1 deletions
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll index fb0f4c67c9..dfb5b17306 100644 --- a/test/CodeGen/ARM/coalesce-subregs.ll +++ b/test/CodeGen/ARM/coalesce-subregs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s +; RUN: llc < %s -mcpu=cortex-a9 -new-coalescer | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios0.0.0" @@ -66,3 +66,51 @@ do.end: ; preds = %do.body declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind + +; CHECK: f3 +; This function has lane insertions that span basic blocks. +; The trivial REG_SEQUENCE lowering can't handle that, but the coalescer can. +; +; void f3(float *p, float *q) { +; float32x2_t x; +; x[1] = p[3]; +; if (q) +; x[0] = q[0] + q[1]; +; else +; x[0] = p[2]; +; vst1_f32(p+4, x); +; } +; +; CHECK-NOT: vmov +; CHECK-NOT: vorr +define void @f3(float* %p, float* %q) nounwind ssp { +entry: + %arrayidx = getelementptr inbounds float* %p, i32 3 + %0 = load float* %arrayidx, align 4 + %vecins = insertelement <2 x float> undef, float %0, i32 1 + %tobool = icmp eq float* %q, null + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = load float* %q, align 4 + %arrayidx2 = getelementptr inbounds float* %q, i32 1 + %2 = load float* %arrayidx2, align 4 + %add = fadd float %1, %2 + %vecins3 = insertelement <2 x float> %vecins, float %add, i32 0 + br label %if.end + +if.else: ; preds = %entry + %arrayidx4 = getelementptr inbounds float* %p, i32 2 + %3 = load float* %arrayidx4, align 4 + %vecins5 = insertelement <2 x float> %vecins, float %3, i32 0 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ] + %add.ptr = getelementptr inbounds float* %p, i32 4 + %4 = bitcast float* %add.ptr to i8* + tail call void @llvm.arm.neon.vst1.v2f32(i8* %4, <2 x float> %x.0, i32 4) + ret void +} + +declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind |