diff options
author | Chris Lattner <sabre@nondot.org> | 2010-02-24 06:11:37 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-02-24 06:11:37 +0000 |
commit | 736a6ea3a2a5322db0e09d97651a1acc07502e41 (patch) | |
tree | 96f50bc8c58bffd072902ecc8e0ea7e152ecab85 | |
parent | ff28103b192a7e818f92628c2a4e34e622c1a142 (diff) |
Change the scheduler from adding nodes in allnodes order
to adding them in a determinstic order (bottom up from
the root) based on the structure of the graph itself.
This updates tests for some random changes, interesting
bits: CodeGen/Blackfin/promote-logic.ll no longer crashes.
I have no idea why, but that's good right?
CodeGen/X86/2009-07-16-LoadFoldingBug.ll also fails, but
now compiles to have one fewer constant pool entry, making
the expected load that was being folded disappear. Since it
is an unreduced mass of gnast, I just removed it.
This fixes PR6370
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@97023 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 16 | ||||
-rw-r--r-- | test/CodeGen/Blackfin/promote-logic.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/MSP430/Inst8rr.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/LargeAbsoluteAddr.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll | 102 |
5 files changed, 18 insertions, 109 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b51c61bf6d..06e7b8c905 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -218,8 +218,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); - for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), - E = DAG->allnodes_end(); NI != E; ++NI) { + // Add all nodes in depth first order. + SmallVector<SDNode*, 64> Worklist; + SmallPtrSet<SDNode*, 64> Visited; + Worklist.push_back(DAG->getRoot().getNode()); + Visited.insert(DAG->getRoot().getNode()); + + while (!Worklist.empty()) { + SDNode *NI = Worklist.pop_back_val(); + + // Add all operands to the worklist unless they've already been added. + for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) + if (Visited.insert(NI->getOperand(i).getNode())) + Worklist.push_back(NI->getOperand(i).getNode()); + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. continue; diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll index 46da56681d..1ac1408290 100644 --- a/test/CodeGen/Blackfin/promote-logic.ll +++ b/test/CodeGen/Blackfin/promote-logic.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=bfin > %t -; XFAIL: * +; RUN: llc < %s -march=bfin ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR ; operation after LegalizeOps. diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll index 74feaae4eb..0f5fc12b62 100644 --- a/test/CodeGen/MSP430/Inst8rr.ll +++ b/test/CodeGen/MSP430/Inst8rr.ll @@ -10,7 +10,7 @@ define i8 @mov(i8 %a, i8 %b) nounwind { define i8 @add(i8 %a, i8 %b) nounwind { ; CHECK: add: -; CHECK: add.b r14, r15 +; CHECK: add.b r12, r15 %1 = add i8 %a, %b ret i8 %1 } diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll index 75374abeb0..b10a996867 100644 --- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll +++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \ -; RUN: grep {stw r4, 32751} +; RUN: grep {stw r3, 32751} ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \ -; RUN: grep {stw r4, 32751} +; RUN: grep {stw r3, 32751} ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \ ; RUN: grep {std r3, 9024} diff --git a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll deleted file mode 100644 index e21c8923df..0000000000 --- a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s - -; CHECK: _foo: -; CHECK: pavgw LCPI1_4(%rip) - -; rdar://7057804 - -define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp { -entry: - %0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] - %1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i10 = add <8 x i16> %0, %3 ; <<8 x i16>> [#uses=1] - %4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone ; <<8 x i16>> [#uses=1] - %5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone ; <<8 x i16>> [#uses=3] - %6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] - %8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i8 = add <8 x i16> %7, %10 ; <<8 x i16>> [#uses=1] - %11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone ; <<8 x i16>> [#uses=1] - %12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone ; <<8 x i16>> [#uses=1] - %13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] - %18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone ; <<8 x i16>> [#uses=1] - %19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone ; <<8 x i16>> [#uses=2] - %20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=4] - %21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone ; <<8 x i16>> [#uses=1] - %22 = bitcast <8 x i16> %21 to <2 x i64> ; <<2 x i64>> [#uses=1] - %23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] - %24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i6 = add <8 x i16> %23, %26 ; <<8 x i16>> [#uses=1] - %27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone ; <<8 x i16>> [#uses=1] - %28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone ; <<8 x i16>> [#uses=1] - %29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] - %30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i4 = add <8 x i16> %29, %32 ; <<8 x i16>> [#uses=1] - %33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone ; <<8 x i16>> [#uses=1] - %34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone ; <<8 x i16>> [#uses=1] - %35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> %20) nounwind readnone ; <<8 x i16>> [#uses=2] - %tmp.i2.i1 = mul <8 x i16> %20, <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170> ; <<8 x i16>> [#uses=1] - %36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i2 = add <8 x i16> %35, %38 ; <<8 x i16>> [#uses=1] - %39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone ; <<8 x i16>> [#uses=1] - %40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone ; <<8 x i16>> [#uses=1] - %41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> %20) nounwind readnone ; <<8 x i16>> [#uses=2] - %tmp.i2.i = mul <8 x i16> %20, <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170> ; <<8 x i16>> [#uses=1] - %42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] - %43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] - %44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] - %tmp.i.i = add <8 x i16> %41, %44 ; <<8 x i16>> [#uses=1] - %45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone ; <<8 x i16>> [#uses=1] - %46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone ; <<8 x i16>> [#uses=1] - %47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone ; <<8 x i16>> [#uses=1] - %48 = bitcast <8 x i16> %47 to <2 x i64> ; <<2 x i64>> [#uses=1] - %49 = bitcast <8 x i16> %28 to <2 x i64> ; <<2 x i64>> [#uses=1] - %50 = getelementptr i16* %out8x8, i64 8 ; <i16*> [#uses=1] - %51 = bitcast i16* %50 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %49, <2 x i64>* %51, align 16 - %52 = bitcast <8 x i16> %40 to <2 x i64> ; <<2 x i64>> [#uses=1] - %53 = getelementptr i16* %out8x8, i64 16 ; <i16*> [#uses=1] - %54 = bitcast i16* %53 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %52, <2 x i64>* %54, align 16 - %55 = getelementptr i16* %out8x8, i64 24 ; <i16*> [#uses=1] - %56 = bitcast i16* %55 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %48, <2 x i64>* %56, align 16 - %57 = bitcast <8 x i16> %46 to <2 x i64> ; <<2 x i64>> [#uses=1] - %58 = getelementptr i16* %out8x8, i64 40 ; <i16*> [#uses=1] - %59 = bitcast i16* %58 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %57, <2 x i64>* %59, align 16 - %60 = bitcast <8 x i16> %34 to <2 x i64> ; <<2 x i64>> [#uses=1] - %61 = getelementptr i16* %out8x8, i64 48 ; <i16*> [#uses=1] - %62 = bitcast i16* %61 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %60, <2 x i64>* %62, align 16 - %63 = getelementptr i16* %out8x8, i64 56 ; <i16*> [#uses=1] - %64 = bitcast i16* %63 to <2 x i64>* ; <<2 x i64>*> [#uses=1] - store <2 x i64> %22, <2 x i64>* %64, align 16 - ret void -} - -declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone - -declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone |