diff options
-rw-r--r-- | lib/CodeGen/SimpleRegisterCoalescing.cpp | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-03-11-CoalescerBug.ll | 85 | ||||
-rw-r--r-- | test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll | 22 | ||||
-rw-r--r-- | test/CodeGen/X86/fold-pcmpeqd-2.ll | 17 |
4 files changed, 37 insertions, 91 deletions
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 74898c210a..8a102702ef 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -1038,9 +1038,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(CP.getSrcReg()), - mri_->use_nodbg_end()) * Threshold < Length) { + if (Length > Threshold) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. if (!CP.isFlipped() && diff --git a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll deleted file mode 100644 index d5ba93e104..0000000000 --- a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll +++ /dev/null @@ -1,85 +0,0 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting - -@lookupTable5B = external global [64 x i32], align 32 ; <[64 x i32]*> [#uses=1] -@lookupTable3B = external global [16 x i32], align 32 ; <[16 x i32]*> [#uses=1] -@disparity0 = external global i32 ; <i32*> [#uses=5] -@disparity1 = external global i32 ; <i32*> [#uses=3] - -define i32 @calc(i32 %theWord, i32 %k) nounwind { -entry: - %0 = lshr i32 %theWord, 3 ; <i32> [#uses=1] - %1 = and i32 %0, 31 ; <i32> [#uses=1] - %2 = shl i32 %k, 5 ; <i32> [#uses=1] - %3 = or i32 %1, %2 ; <i32> [#uses=1] - %4 = and i32 %theWord, 7 ; <i32> [#uses=1] - %5 = shl i32 %k, 3 ; <i32> [#uses=1] - %6 = or i32 %5, %4 ; <i32> [#uses=1] - %7 = getelementptr [64 x i32]* @lookupTable5B, i32 0, i32 %3 ; <i32*> [#uses=1] - %8 = load i32* %7, align 4 ; <i32> [#uses=5] - %9 = getelementptr [16 x i32]* @lookupTable3B, i32 0, i32 %6 ; <i32*> [#uses=1] - %10 = load i32* %9, align 4 ; <i32> [#uses=5] - %11 = and i32 %8, 65536 ; <i32> [#uses=1] - %12 = icmp eq i32 %11, 0 ; <i1> [#uses=1] - br i1 %12, label %bb1, label %bb - -bb: ; preds = %entry - %13 = and i32 %8, 994 ; <i32> [#uses=1] - %14 = load i32* @disparity0, align 4 ; <i32> [#uses=2] - store i32 %14, i32* @disparity1, align 4 - br label %bb8 - -bb1: ; preds = %entry - %15 = lshr i32 %8, 18 ; <i32> [#uses=1] - %16 = and i32 %15, 1 ; <i32> [#uses=1] - %17 = load i32* @disparity0, align 4 ; <i32> [#uses=4] - %18 = icmp eq i32 %16, %17 ; <i1> [#uses=1] - %not = select i1 %18, i32 0, i32 994 ; <i32> [#uses=1] - %.masked = and i32 %8, 994 ; <i32> [#uses=1] - %result.1 = xor i32 %not, %.masked ; <i32> [#uses=2] - %19 = and i32 %8, 524288 ; <i32> [#uses=1] - %20 = icmp eq i32 %19, 0 ; <i1> [#uses=1] - br i1 %20, label %bb7, label %bb6 - -bb6: ; preds = %bb1 - %21 = xor i32 %17, 1 ; <i32> [#uses=2] - store i32 %21, i32* @disparity1, align 4 - br label %bb8 - -bb7: ; preds = %bb1 - store i32 %17, i32* @disparity1, align 4 - br label %bb8 - -bb8: ; preds = %bb7, %bb6, %bb - %22 = phi i32 [ %17, %bb7 ], [ %21, %bb6 ], [ %14, %bb ] ; <i32> [#uses=4] - %result.0 = phi i32 [ %result.1, %bb7 ], [ %result.1, %bb6 ], [ %13, %bb ] ; <i32> [#uses=2] - %23 = and i32 %10, 65536 ; <i32> [#uses=1] - %24 = icmp eq i32 %23, 0 ; <i1> [#uses=1] - br i1 %24, label %bb10, label %bb9 - -bb9: ; preds = %bb8 - %25 = and i32 %10, 29 ; <i32> [#uses=1] - %26 = or i32 %result.0, %25 ; <i32> [#uses=1] - store i32 %22, i32* @disparity0, align 4 - ret i32 %26 - -bb10: ; preds = %bb8 - %27 = lshr i32 %10, 18 ; <i32> [#uses=1] - %28 = and i32 %27, 1 ; <i32> [#uses=1] - %29 = icmp eq i32 %28, %22 ; <i1> [#uses=1] - %not13 = select i1 %29, i32 0, i32 29 ; <i32> [#uses=1] - %.masked20 = and i32 %10, 29 ; <i32> [#uses=1] - %.pn = xor i32 %not13, %.masked20 ; <i32> [#uses=1] - %result.3 = or i32 %.pn, %result.0 ; <i32> [#uses=2] - %30 = and i32 %10, 524288 ; <i32> [#uses=1] - %31 = icmp eq i32 %30, 0 ; <i1> [#uses=1] - br i1 %31, label %bb17, label %bb16 - -bb16: ; preds = %bb10 - %32 = xor i32 %22, 1 ; <i32> [#uses=1] - store i32 %32, i32* @disparity0, align 4 - ret i32 %result.3 - -bb17: ; preds = %bb10 - store i32 %22, i32* @disparity0, align 4 - ret i32 %result.3 -} diff --git a/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll b/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll new file mode 100644 index 0000000000..e48edf7e30 --- /dev/null +++ b/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll @@ -0,0 +1,22 @@ +; RUN: llc -mcpu=yonah < %s +; PR9438 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-unknown-freebsd9.0" + +; The 'call fastcc' ties down %ebx, %ecx, and %edx. +; A MUL8r ties down %al, leaving no GR32_ABCD registers available. +; The coalescer can easily overallocate physical registers, +; and register allocation fails. + +declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind + +define i32 @cvtchar(i8* nocapture %sp) nounwind { + %temp.i = alloca [2 x i8], align 1 + %tmp1 = load i8* %sp, align 1 + %div = udiv i8 %tmp1, 10 + %rem = urem i8 %div, 10 + %arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0 + store i8 %rem, i8* %arrayidx.i, align 1 + %call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind + ret i32 undef +} diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll index 49f879504e..c85a97a3fa 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -1,10 +1,20 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | not grep pcmpeqd -; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1 +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -; This testcase should need to spill the -1 value on x86-32, +; This testcase should need to spill the -1 value on both x86-32 and x86-64, ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it ; should use a constant-pool load instead. +; Constant pool all-ones vector: +; CHECK: .long 4294967295 +; CHECK-NEXT: .long 4294967295 +; CHECK-NEXT: .long 4294967295 +; CHECK-NEXT: .long 4294967295 + +; No pcmpeqd instructions, everybody uses the constant pool. +; CHECK: program_1: +; CHECK-NOT: pcmpeqd + %struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }> %struct._cl_image_format_t = type <{ i32, i32, i32 }> %struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }> @@ -57,6 +67,7 @@ forbody: ; preds = %forcond %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1] %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1] + call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1] %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1] %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] |