From a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 19 Mar 2013 18:51:05 +0000 Subject: Prepare to make r0 an allocatable register on PPC Currently the PPC r0 register is unconditionally reserved. There are two reasons for this: 1. r0 is treated specially (as the constant 0) by certain instructions, and so cannot be used with those instructions as a regular register. 2. r0 is used as a temporary register in the CR-register spilling process (where, under some circumstances, we require two GPRs). This change addresses the first reason by introducing a restricted register class (without r0) for use by those instructions that treat r0 specially. These register classes have a new pseudo-register, ZERO, which represents the r0-as-0 use. This has the side benefit of making the existing target code simpler (and easier to understand), and will make it clear to the register allocator that uses of r0 as 0 don't conflict will real uses of the r0 register. Once the CR spilling code is improved, we'll be able to allocate r0. Adding these extra register classes, for some reason unclear to me, causes requests to the target to copy 32-bit registers to 64-bit registers. The resulting code seems correct (and causes no test-suite failures), and the new test case covers this new kind of asymmetric copy. As r0 is still reserved, no functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177423 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/PowerPC/asym-regclass-copy.ll | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 test/CodeGen/PowerPC/asym-regclass-copy.ll (limited to 'test/CodeGen/PowerPC/asym-regclass-copy.ll') diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll new file mode 100644 index 0000000000..cdfbcbc0f1 --- /dev/null +++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@gen_random.last = external unnamed_addr global i64, align 8 +@.str = external unnamed_addr constant [4 x i8], align 1 + +declare double @gen_random(double) #0 + +declare void @benchmark_heapsort(i32 signext, double* nocapture) #0 + +define signext i32 @main(i32 signext %argc, i8** nocapture %argv) #0 { +entry: + br i1 undef, label %cond.true, label %cond.end + +cond.true: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.true, %entry + %cond = phi i32 [ 0, %cond.true ], [ 8000000, %entry ] + %add = add i32 %cond, 1 + %conv = sext i32 %add to i64 + %mul = shl nsw i64 %conv, 3 + %call1 = tail call noalias i8* @malloc(i64 %mul) #1 + br i1 undef, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %cond.end + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %add + br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %cond.end + ret i32 0 +} + +declare noalias i8* @malloc(i64) #0 + +declare signext i32 @printf(i8* nocapture, ...) #0 + +declare void @free(i8* nocapture) #0 + +declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } -- cgit v1.2.3-18-g5258 From 7ab1e60133b0545f682241e20b7969d221581ee3 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 19 Mar 2013 20:22:32 +0000 Subject: Add a comment to the CodeGen/PowerPC/asym-regclass-copy.ll test git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177434 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/PowerPC/asym-regclass-copy.ll | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test/CodeGen/PowerPC/asym-regclass-copy.ll') diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll index cdfbcbc0f1..c399802a1e 100644 --- a/test/CodeGen/PowerPC/asym-regclass-copy.ll +++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll @@ -2,6 +2,8 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" +; This test triggers the use of the asymmetric OR8_32 copy pattern. + @gen_random.last = external unnamed_addr global i64, align 8 @.str = external unnamed_addr constant [4 x i8], align 1 -- cgit v1.2.3-18-g5258 From 3ea1b064a0b9c3d161b0f77a9e957970f98907ab Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 21 Mar 2013 23:23:34 +0000 Subject: Fix a register-class comparison bug in PPCCTRLoops Thanks to Jakob for isolating the underlying problem from the test case in r177423. The original commit had introduced asymmetric copy operations, but these turned out to be a work-around to the real problem (the use of == instead of hasSubClassEq in PPCCTRLoops). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177679 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/PowerPC/asym-regclass-copy.ll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'test/CodeGen/PowerPC/asym-regclass-copy.ll') diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll index c399802a1e..d04a6c98ee 100644 --- a/test/CodeGen/PowerPC/asym-regclass-copy.ll +++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll @@ -2,7 +2,8 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -; This test triggers the use of the asymmetric OR8_32 copy pattern. +; This tests that the GPRC/GPRC_NOR0 intersection subclass relationship with +; GPRC is handled correctly. When it was not, this test would assert. @gen_random.last = external unnamed_addr global i64, align 8 @.str = external unnamed_addr constant [4 x i8], align 1 -- cgit v1.2.3-18-g5258