aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Wilson <bob.wilson@apple.com>2010-02-12 01:30:21 +0000
committerBob Wilson <bob.wilson@apple.com>2010-02-12 01:30:21 +0000
commitfe61fb1e1082c81653ed78efd6d471592a2e57ad (patch)
treedf0bb339b5cb79a8d22e66430b920e47fae100a5
parentccfa1db5380fd3089fc68f3ed9ccfe6a41d7a309 (diff)
Add a new pass on machine instructions to optimize away PHI cycles that
reduce down to a single value. InstCombine already does this transformation but DAG legalization may introduce new opportunities. This has turned out to be important for ARM where 64-bit values are split up during type legalization: InstCombine is not able to remove the PHI cycles on the 64-bit values but the separate 32-bit values can be optimized. I measured the compile time impact of this (running llc on 176.gcc) and it was not significant. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95951 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/Passes.h4
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp1
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp141
-rw-r--r--test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll34
5 files changed, 181 insertions, 0 deletions
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 7e0da3f6a8..dbc73cbc54 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -174,6 +174,10 @@ namespace llvm {
/// optimization by increasing uses of extended values.
FunctionPass *createOptimizeExtsPass();
+ /// createOptimizePHIsPass - This pass optimizes machine instruction PHIs
+ /// to take advantage of opportunities created during DAG legalization.
+ FunctionPass *createOptimizePHIsPass();
+
/// createStackSlotColoringPass - This pass performs stack slot coloring.
FunctionPass *createStackSlotColoringPass(bool);
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9fcbea9437..46d3268259 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_library(LLVMCodeGen
ObjectCodeEmitter.cpp
OcamlGC.cpp
OptimizeExts.cpp
+ OptimizePHIs.cpp
PHIElimination.cpp
Passes.cpp
PostRASchedulerList.cpp
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 40e015067f..3c12afaacf 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -299,6 +299,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (OptLevel != CodeGenOpt::None) {
PM.add(createOptimizeExtsPass());
+ PM.add(createOptimizePHIsPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
if (!DisableMachineSink)
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 0000000000..5b3fa6a68a
--- /dev/null
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,141 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+
+namespace {
+ class OptimizePHIs : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification
+ OptimizePHIs() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool IsSingleValuePHICycle(const MachineInstr *MI, unsigned &SingleValReg,
+ SmallSet<unsigned, 16> &RegsInCycle);
+ bool ReplacePHICycles(MachineBasicBlock &MBB);
+ };
+}
+
+char OptimizePHIs::ID = 0;
+static RegisterPass<OptimizePHIs>
+X("opt-phis", "Optimize machine instruction PHIs");
+
+FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+ MRI = &Fn.getRegInfo();
+ TII = Fn.getTarget().getInstrInfo();
+
+ // Find PHI cycles that can be replaced by a single value. InstCombine
+ // does this, but DAG legalization may introduce new opportunities, e.g.,
+ // when i64 values are split up for 32-bit targets.
+ bool Changed = false;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= ReplacePHICycles(*I);
+
+ return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value. RegsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(const MachineInstr *MI,
+ unsigned &SingleValReg,
+ SmallSet<unsigned, 16> &RegsInCycle) {
+ assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // See if we already saw this register.
+ if (!RegsInCycle.insert(DstReg))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (RegsInCycle.size() == 16)
+ return false;
+
+ // Scan the PHI operands.
+ for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ if (SrcReg == DstReg)
+ continue;
+ const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ // Skip over register-to-register moves.
+ unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx;
+ if (SrcMI &&
+ TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) &&
+ SrcSubIdx == 0 && DstSubIdx == 0 &&
+ TargetRegisterInfo::isVirtualRegister(MvSrcReg))
+ SrcMI = MRI->getVRegDef(MvSrcReg);
+ if (!SrcMI)
+ return false;
+
+ if (SrcMI->isPHI()) {
+ if (!IsSingleValuePHICycle(SrcMI, SingleValReg, RegsInCycle))
+ return false;
+ } else {
+ // Fail if there is more than one non-phi/non-move register.
+ if (SingleValReg != 0)
+ return false;
+ SingleValReg = SrcReg;
+ }
+ }
+ return true;
+}
+
+/// ReplacePHICycles - Find PHI cycles that can be replaced by a single
+/// value and remove them.
+bool OptimizePHIs::ReplacePHICycles(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator
+ MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+ MachineInstr *MI = &*MII++;
+ if (!MI->isPHI())
+ break;
+
+ unsigned SingleValReg = 0;
+ SmallSet<unsigned, 16> RegsInCycle;
+ if (IsSingleValuePHICycle(MI, SingleValReg, RegsInCycle) &&
+ SingleValReg != 0) {
+ MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+ MI->eraseFromParent();
+ ++NumPHICycles;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
new file mode 100644
index 0000000000..997257885e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+define arm_apcscc i32 @test(i32 %n) nounwind {
+; CHECK: test:
+; CHECK-NOT: mov
+; CHECK: return
+entry:
+ %0 = icmp eq i32 %n, 1 ; <i1> [#uses=1]
+ br i1 %0, label %return, label %bb.nph
+
+bb.nph: ; preds = %entry
+ %tmp = add i32 %n, -1 ; <i32> [#uses=1]
+ br label %bb
+
+bb: ; preds = %bb.nph, %bb
+ %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=1]
+ %u.05 = phi i64 [ undef, %bb.nph ], [ %ins, %bb ] ; <i64> [#uses=1]
+ %1 = tail call arm_apcscc i32 @f() nounwind ; <i32> [#uses=1]
+ %tmp4 = zext i32 %1 to i64 ; <i64> [#uses=1]
+ %mask = and i64 %u.05, -4294967296 ; <i64> [#uses=1]
+ %ins = or i64 %tmp4, %mask ; <i64> [#uses=2]
+ tail call arm_apcscc void @g(i64 %ins) nounwind
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %indvar.next, %tmp ; <i1> [#uses=1]
+ br i1 %exitcond, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret i32 undef
+}
+
+declare arm_apcscc i32 @f()
+
+declare arm_apcscc void @g(i64)