diff options
-rw-r--r-- | lib/CodeGen/MachineBlockPlacement.cpp | 42 | ||||
-rw-r--r-- | test/CodeGen/X86/block-placement.ll | 71 |
2 files changed, 108 insertions, 5 deletions
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 6831c1b360..7700efc6f9 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -20,13 +20,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "block-placement2" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DenseMap.h" @@ -35,6 +36,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; @@ -259,9 +261,15 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the function-wide block frequency pass. const MachineBlockFrequencyInfo *MBFI; + /// \brief A handle to the loop info. + const MachineLoopInfo *MLI; + /// \brief A handle to the target's instruction info. const TargetInstrInfo *TII; + /// \brief A handle to the target's lowering info. + const TargetLowering *TLI; + /// \brief A prioritized list of edges in the BB-graph. /// /// For each function, we insert all control flow edges between BBs, along @@ -307,6 +315,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void BuildBlockChains(); void PrioritizeChains(MachineFunction &F); void PlaceBlockChains(MachineFunction &F); + void AlignLoops(MachineFunction &F); public: static char ID; // Pass identification, replacement for typeid @@ -319,6 +328,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -331,6 +341,7 @@ INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", "Branch Probability Basic Block Placement", false, false) @@ -595,6 +606,28 @@ void MachineBlockPlacement::PlaceBlockChains(MachineFunction &F) { } } +/// \brief Recursive helper to align a loop and any nested loops. +static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) { + // Recurse through nested loops. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + AlignLoop(F, *I, Align); + + L->getTopBlock()->setAlignment(Align); +} + +/// \brief Align loop headers to target preferred alignments. +void MachineBlockPlacement::AlignLoops(MachineFunction &F) { + if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return; + + unsigned Align = TLI->getPrefLoopAlignment(); + if (!Align) + return; // Don't care about loop alignment. + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) + AlignLoop(F, *I, Align); +} + bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. if (llvm::next(F.begin()) == F.end()) @@ -602,7 +635,9 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MLI = &getAnalysis<MachineLoopInfo>(); TII = F.getTarget().getInstrInfo(); + TLI = F.getTarget().getTargetLowering(); assert(Edges.empty()); assert(BlockToChain.empty()); assert(PChains.empty()); @@ -612,6 +647,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { BuildBlockChains(); PrioritizeChains(F); PlaceBlockChains(F); + AlignLoops(F); Edges.clear(); BlockToChain.clear(); diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll index c7ab6ef821..887a62eabd 100644 --- a/test/CodeGen/X86/block-placement.ll +++ b/test/CodeGen/X86/block-placement.ll @@ -2,10 +2,10 @@ declare void @error(i32 %i, i32 %a, i32 %b) -define i32 @test1(i32 %i, i32* %a, i32 %b) { +define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) { ; Test a chain of ifs, where the block guarded by the if is error handling code ; that is not expected to run. -; CHECK: test1: +; CHECK: test_ifchains: ; CHECK: %entry ; CHECK: %else1 ; CHECK: %else2 @@ -73,3 +73,70 @@ exit: } !0 = metadata !{metadata !"branch_weights", i32 4, i32 64} + +define i32 @test_loop_align(i32 %i, i32* %a) { +; Check that we provide basic loop body alignment with the block placement +; pass. +; CHECK: test_loop_align: +; CHECK: %entry +; CHECK: .align 16, +; CHECK-NEXT: %body +; CHECK: %exit + +entry: + br label %body + +body: + %iv = phi i32 [ 0, %entry ], [ %next, %body ] + %base = phi i32 [ 0, %entry ], [ %sum, %body ] + %arrayidx = getelementptr inbounds i32* %a, i32 %iv + %0 = load i32* %arrayidx + %sum = add nsw i32 %0, %base + %next = add i32 %iv, 1 + %exitcond = icmp eq i32 %next, %i + br i1 %exitcond, label %exit, label %body + +exit: + ret i32 %sum +} + +define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) { +; Check that we provide nested loop body alignment. +; CHECK: test_nested_loop_align: +; CHECK: %entry +; CHECK: .align 16, +; CHECK-NEXT: %loop.body.1 +; CHECK: .align 16, +; CHECK-NEXT: %inner.loop.body +; CHECK-NOT: .align +; CHECK: %loop.body.2 +; CHECK: %exit + +entry: + br label %loop.body.1 + +loop.body.1: + %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ] + %arrayidx = getelementptr inbounds i32* %a, i32 %iv + %bidx = load i32* %arrayidx + br label %inner.loop.body + +inner.loop.body: + %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ] + %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ] + %scaled_idx = mul i32 %bidx, %iv + %inner.arrayidx = getelementptr inbounds i32* %b, i32 %scaled_idx + %0 = load i32* %inner.arrayidx + %sum = add nsw i32 %0, %base + %inner.next = add i32 %iv, 1 + %inner.exitcond = icmp eq i32 %inner.next, %i + br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body + +loop.body.2: + %next = add i32 %iv, 1 + %exitcond = icmp eq i32 %next, %i + br i1 %exitcond, label %exit, label %loop.body.1 + +exit: + ret i32 %sum +} |