Merge into undefined lanes under -new-coalescer.

Add LIS::pruneValue() and extendToIndices(). These two functions are used by the register coalescer when merging two live ranges requires more than a trivial value mapping as supported by LiveInterval::join(). The pruneValue() function can remove the part of a value number that is going to conflict in join(). Afterwards, extendToIndices can restore the live range, using any new dominating value numbers and updating the SSA form. Use this complex value mapping to support merging a register into a vector lane that has a conflicting value, but the clobbered lane is undef. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164074 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jakob Stoklund Olesen <stoklund@2pi.dk> 2012-09-17 23:03:25 +0000
committer: Jakob Stoklund Olesen <stoklund@2pi.dk> 2012-09-17 23:03:25 +0000
commit: 87f7864c6d81ae134335b8271ac12c937c81dffc (patch)
tree: 775f7cdefe682b919193144641cebe372ce0326c /test
parent: 98279e8d65fe5c86d0370b3e2a62f244985bec33 (diff)
1 files changed, 49 insertions, 1 deletions
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index fb0f4c67c9..dfb5b17306 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a9 -new-coalescer | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios0.0.0"
 
@@ -66,3 +66,51 @@ do.end:                                           ; preds = %do.body
 
 declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
 declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+
+; CHECK: f3
+; This function has lane insertions that span basic blocks.
+; The trivial REG_SEQUENCE lowering can't handle that, but the coalescer can.
+;
+; void f3(float *p, float *q) {
+;   float32x2_t x;
+;   x[1] = p[3];
+;   if (q)
+;     x[0] = q[0] + q[1];
+;   else
+;     x[0] = p[2];
+;   vst1_f32(p+4, x);
+; }
+;
+; CHECK-NOT: vmov
+; CHECK-NOT: vorr
+define void @f3(float* %p, float* %q) nounwind ssp {
+entry:
+  %arrayidx = getelementptr inbounds float* %p, i32 3
+  %0 = load float* %arrayidx, align 4
+  %vecins = insertelement <2 x float> undef, float %0, i32 1
+  %tobool = icmp eq float* %q, null
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %1 = load float* %q, align 4
+  %arrayidx2 = getelementptr inbounds float* %q, i32 1
+  %2 = load float* %arrayidx2, align 4
+  %add = fadd float %1, %2
+  %vecins3 = insertelement <2 x float> %vecins, float %add, i32 0
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %arrayidx4 = getelementptr inbounds float* %p, i32 2
+  %3 = load float* %arrayidx4, align 4
+  %vecins5 = insertelement <2 x float> %vecins, float %3, i32 0
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ]
+  %add.ptr = getelementptr inbounds float* %p, i32 4
+  %4 = bitcast float* %add.ptr to i8*
+  tail call void @llvm.arm.neon.vst1.v2f32(i8* %4, <2 x float> %x.0, i32 4)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
author	Jakob Stoklund Olesen <stoklund@2pi.dk>	2012-09-17 23:03:25 +0000
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>	2012-09-17 23:03:25 +0000
commit	87f7864c6d81ae134335b8271ac12c937c81dffc (patch)
tree	775f7cdefe682b919193144641cebe372ce0326c /test
parent	98279e8d65fe5c86d0370b3e2a62f244985bec33 (diff)