aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkira Hatanaka <ahatanaka@mips.com>2013-03-01 02:03:51 +0000
committerAkira Hatanaka <ahatanaka@mips.com>2013-03-01 02:03:51 +0000
commit1f0aca857b899b397a9d82bb21cb1ca819419a90 (patch)
treeacbc10c5017e0043d22ab37c56bbb66adbdd5f18
parent888e8fefd22550ccfa496c3c1e02bd2ac036263b (diff)
[mips] Add the capability to search delay slot filling instructions in
successor basic blocks. Currently this is off by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176329 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp335
-rw-r--r--test/CodeGen/Mips/brdelayslot.ll56
2 files changed, 359 insertions, 32 deletions
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index bd25f42d9b..bf6af970b6 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -14,12 +14,14 @@
#define DEBUG_TYPE "delay-slot-filler"
#include "Mips.h"
+#include "MipsInstrInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
@@ -66,6 +68,24 @@ static cl::opt<bool> DisableBackwardSearch(
cl::Hidden);
namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+ typedef SmallDenseMap<MachineBasicBlock*, MachineInstr*, 2> BB2BrMap;
+
+ /// \brief A functor comparing edge weight of two blocks.
+ struct CmpWeight {
+ CmpWeight(const MachineBasicBlock &S,
+ const MachineBranchProbabilityInfo &P) : Src(S), Prob(P) {}
+
+ bool operator()(const MachineBasicBlock *Dst0,
+ const MachineBasicBlock *Dst1) const {
+ return Prob.getEdgeWeight(&Src, Dst0) < Prob.getEdgeWeight(&Src, Dst1);
+ }
+
+ const MachineBasicBlock &Src;
+ const MachineBranchProbabilityInfo &Prob;
+ };
+
class RegDefsUses {
public:
RegDefsUses(TargetMachine &TM);
@@ -74,6 +94,14 @@ namespace {
/// This function sets all caller-saved registers in Defs.
void setCallerSaved(const MachineInstr &MI);
+ /// This function sets all unallocatable registers in Defs.
+ void setUnallocatableRegs(const MachineFunction &MF);
+
+ /// Set bits in Uses corresponding to MBB's live-out registers except for
+ /// the registers that are live-in to SuccBB.
+ void addLiveOut(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &SuccBB);
+
bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
private:
@@ -90,14 +118,41 @@ namespace {
/// Base class for inspecting loads and stores.
class InspectMemInstr {
public:
- virtual bool hasHazard(const MachineInstr &MI) = 0;
+ InspectMemInstr(bool ForbidMemInstr_)
+ : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false),
+ SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {}
+
+ /// Return true if MI cannot be moved to delay slot.
+ bool hasHazard(const MachineInstr &MI);
+
virtual ~InspectMemInstr() {}
+
+ protected:
+ /// Flags indicating whether loads or stores have been seen.
+ bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore;
+
+ /// Memory instructions are not allowed to move to delay slot if this flag
+ /// is true.
+ bool ForbidMemInstr;
+
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI) = 0;
};
/// This subclass rejects any memory instructions.
class NoMemInstr : public InspectMemInstr {
public:
- virtual bool hasHazard(const MachineInstr &MI);
+ NoMemInstr() : InspectMemInstr(true) {}
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI) { return true; }
+ };
+
+ /// This subclass accepts loads from stacks and constant loads.
+ class LoadFromStackOrConst : public InspectMemInstr {
+ public:
+ LoadFromStackOrConst() : InspectMemInstr(false) {}
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI);
};
/// This subclass uses memory dependence information to determine whether a
@@ -106,10 +161,9 @@ namespace {
public:
MemDefsUses(const MachineFrameInfo *MFI);
- /// Return true if MI cannot be moved to delay slot.
- virtual bool hasHazard(const MachineInstr &MI);
-
private:
+ virtual bool hasHazard_(const MachineInstr &MI);
+
/// Update Defs and Uses. Return true if there exist dependences that
/// disqualify the delay slot candidate between V and values in Uses and Defs.
bool updateDefsUses(const Value *V, bool MayStore);
@@ -121,16 +175,9 @@ namespace {
const MachineFrameInfo *MFI;
SmallPtrSet<const Value*, 4> Uses, Defs;
- /// Flags indicating whether loads or stores have been seen.
- bool SeenLoad, SeenStore;
-
/// Flags indicating whether loads or stores with no underlying objects have
/// been seen.
bool SeenNoObjLoad, SeenNoObjStore;
-
- /// Memory instructions are not allowed to move to delay slot if this flag
- /// is true.
- bool ForbidMemInstr;
};
class Filler : public MachineFunctionPass {
@@ -153,10 +200,12 @@ namespace {
return Changed;
}
- private:
- typedef MachineBasicBlock::iterator Iter;
- typedef MachineBasicBlock::reverse_iterator ReverseIter;
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ private:
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
/// This function checks if it is valid to move Candidate to the delay slot
@@ -179,6 +228,26 @@ namespace {
/// that can be moved to the delay slot. Returns true on success.
bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
+ /// This function searches MBB's successor blocks for an instruction that
+ /// can be moved to the delay slot and inserts clones of the instruction into
+ /// the successor blocks.
+ bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// Pick a successor block of MBB. Return NULL if MBB doesn't have a successor
+ /// block that is not a landing pad.
+ MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const;
+
+ /// This function analyzes MBB and returns an instruction with an unoccupied
+ /// slot that branches to Dst.
+ std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+ getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const;
+
+ /// Examine Pred and see if it is possible to insert an instruction into
+ /// one of its branches delay slot or its end.
+ bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+ RegDefsUses &RegDU, bool &HasMultipleSuccs,
+ BB2BrMap &BrMap) const;
+
bool terminateSearch(const MachineInstr &Candidate) const;
TargetMachine &TM;
@@ -189,6 +258,45 @@ namespace {
char Filler::ID = 0;
} // end of anonymous namespace
+static bool hasUnoccupiedSlot(const MachineInstr *MI) {
+ return MI->hasDelaySlot() && !MI->isBundledWithSucc();
+}
+
+/// This function inserts clones of Filler into predecessor blocks.
+static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
+ MachineFunction *MF = Filler->getParent()->getParent();
+
+ for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) {
+ if (I->second) {
+ MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler));
+ ++UsefulSlots;
+ } else {
+ I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler));
+ }
+ }
+}
+
+/// This function adds registers Filler defines to MBB's live-in register list.
+static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
+ for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Filler->getOperand(I);
+ unsigned R;
+
+ if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
+ continue;
+
+#ifndef NDEBUG
+ const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
+ "Shouldn't move an instruction with unallocatable registers across "
+ "basic block boundaries.");
+#endif
+
+ if (!MBB.isLiveIn(R))
+ MBB.addLiveIn(R);
+ }
+}
+
RegDefsUses::RegDefsUses(TargetMachine &TM)
: TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false),
Uses(TRI.getNumRegs(), false) {}
@@ -226,6 +334,29 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
Defs |= CallerSavedRegs;
}
+void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
+ BitVector AllocSet = TRI.getAllocatableSet(MF);
+
+ for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R))
+ for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
+ AllocSet.set(*AI);
+
+ AllocSet.set(Mips::ZERO);
+ AllocSet.set(Mips::ZERO_64);
+
+ Defs |= AllocSet.flip();
+}
+
+void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &SuccBB) {
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ if (*SI != &SuccBB)
+ for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
+ LE = (*SI)->livein_end(); LI != LE; ++LI)
+ Uses.set(*LI);
+}
+
bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
bool HasHazard = false;
@@ -264,24 +395,15 @@ bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
return false;
}
-bool NoMemInstr::hasHazard(const MachineInstr &MI) {
- // Return true if MI accesses memory.
- return (MI.mayStore() || MI.mayLoad());
-}
-
-MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
- : MFI(MFI_), SeenLoad(false), SeenStore(false), SeenNoObjLoad(false),
- SeenNoObjStore(false), ForbidMemInstr(false) {}
-
-bool MemDefsUses::hasHazard(const MachineInstr &MI) {
+bool InspectMemInstr::hasHazard(const MachineInstr &MI) {
if (!MI.mayStore() && !MI.mayLoad())
return false;
if (ForbidMemInstr)
return true;
- bool OrigSeenLoad = SeenLoad, OrigSeenStore = SeenStore;
-
+ OrigSeenLoad = SeenLoad;
+ OrigSeenStore = SeenStore;
SeenLoad |= MI.mayLoad();
SeenStore |= MI.mayStore();
@@ -292,6 +414,33 @@ bool MemDefsUses::hasHazard(const MachineInstr &MI) {
return true;
}
+ return hasHazard_(MI);
+}
+
+bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
+ if (MI.mayStore())
+ return true;
+
+ if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ return true;
+
+ const Value *V = (*MI.memoperands_begin())->getValue();
+
+ if (isa<FixedStackPseudoSourceValue>(V))
+ return false;
+
+ if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
+ return !PSV->PseudoSourceValue::isConstant(0) &&
+ (V != PseudoSourceValue::getStack());
+
+ return true;
+}
+
+MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
+ : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false),
+ SeenNoObjStore(false) {}
+
+bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool HasHazard = false;
SmallVector<const Value *, 4> Objs;
@@ -353,16 +502,24 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
- if (!I->hasDelaySlot())
+ if (!hasUnoccupiedSlot(&*I))
continue;
++FilledSlots;
Changed = true;
// Delay slot filling is disabled at -O0.
- if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) &&
- (searchBackward(MBB, I) || searchForward(MBB, I)))
- continue;
+ if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
+ if (searchBackward(MBB, I))
+ continue;
+
+ if (I->isTerminator()) {
+ if (searchSuccBBs(MBB, I))
+ continue;
+ } else if (searchForward(MBB, I)) {
+ continue;
+ }
+ }
// Bundle the NOP to the instruction with the delay slot.
BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
@@ -404,6 +561,9 @@ bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
}
bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
+ if (DisableBackwardSearch)
+ return false;
+
RegDefsUses RegDU(TM);
MemDefsUses MemDU(MBB.getParent()->getFrameInfo());
ReverseIter Filler;
@@ -422,7 +582,7 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
// Can handle only calls.
- if (!Slot->isCall())
+ if (DisableForwardSearch || !Slot->isCall())
return false;
RegDefsUses RegDU(TM);
@@ -441,6 +601,117 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
return false;
}
+bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
+ if (DisableSuccBBSearch)
+ return false;
+
+ MachineBasicBlock *SuccBB = selectSuccBB(MBB);
+
+ if (!SuccBB)
+ return false;
+
+ RegDefsUses RegDU(TM);
+ bool HasMultipleSuccs = false;
+ BB2BrMap BrMap;
+ OwningPtr<InspectMemInstr> IM;
+ Iter Filler;
+
+ // Iterate over SuccBB's predecessor list.
+ for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
+ PE = SuccBB->pred_end(); PI != PE; ++PI)
+ if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
+ return false;
+
+ // Do not allow moving instructions which have unallocatable register operands
+ // across basic block boundaries.
+ RegDU.setUnallocatableRegs(*MBB.getParent());
+
+ // Only allow moving loads from stack or constants if any of the SuccBB's
+ // predecessors have multiple successors.
+ if (HasMultipleSuccs) {
+ IM.reset(new LoadFromStackOrConst());
+ } else {
+ const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
+ IM.reset(new MemDefsUses(MFI));
+ }
+
+ if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler))
+ return false;
+
+ insertDelayFiller(Filler, BrMap);
+ addLiveInRegs(Filler, *SuccBB);
+ Filler->eraseFromParent();
+
+ return true;
+}
+
+MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
+ if (B.succ_empty())
+ return NULL;
+
+ // Select the successor with the larget edge weight.
+ CmpWeight Cmp(B, getAnalysis<MachineBranchProbabilityInfo>());
+ MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(), Cmp);
+ return S->isLandingPad() ? NULL : S;
+}
+
+std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
+ const MipsInstrInfo *TII =
+ static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ MachineBasicBlock *TrueBB = 0, *FalseBB = 0;
+ SmallVector<MachineInstr*, 2> BranchInstrs;
+ SmallVector<MachineOperand, 2> Cond;
+
+ MipsInstrInfo::BranchType R =
+ TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs);
+
+ if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch))
+ return std::make_pair(R, (MachineInstr*)NULL);
+
+ if (R != MipsInstrInfo::BT_CondUncond) {
+ if (!hasUnoccupiedSlot(BranchInstrs[0]))
+ return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+
+ assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
+
+ return std::make_pair(R, BranchInstrs[0]);
+ }
+
+ assert((TrueBB == &Dst) || (FalseBB == &Dst));
+
+ // Examine the conditional branch. See if its slot is occupied.
+ if (hasUnoccupiedSlot(BranchInstrs[0]))
+ return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]);
+
+ // If that fails, try the unconditional branch.
+ if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst))
+ return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]);
+
+ return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+}
+
+bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+ RegDefsUses &RegDU, bool &HasMultipleSuccs,
+ BB2BrMap &BrMap) const {
+ std::pair<MipsInstrInfo::BranchType, MachineInstr *> P =
+ getBranch(Pred, Succ);
+
+ // Return if either getBranch wasn't able to analyze the branches or there
+ // were no branches with unoccupied slots.
+ if (P.first == MipsInstrInfo::BT_None)
+ return false;
+
+ if ((P.first != MipsInstrInfo::BT_Uncond) &&
+ (P.first != MipsInstrInfo::BT_NoBranch)) {
+ HasMultipleSuccs = true;
+ RegDU.addLiveOut(Pred, Succ);
+ }
+
+ BrMap[&Pred] = P.second;
+ return true;
+}
+
bool Filler::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
InspectMemInstr &IM) const {
bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill());
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index 5e51abe8fe..9685e7b95d 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -4,6 +4,9 @@
; RUN: FileCheck %s -check-prefix=STATICO1
; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \
; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD
+; RUN: llc -march=mipsel -disable-mips-df-backward-search \
+; RUN: -disable-mips-df-succbb-search=false < %s | \
+; RUN: FileCheck %s -check-prefix=SUCCBB
define void @foo1() nounwind {
entry:
@@ -75,6 +78,7 @@ if.end:
;
; Default: foo6:
; Default-NOT: nop
+; Default: .end foo6
define void @foo6(float %a0, double %a1) nounwind {
entry:
@@ -109,6 +113,7 @@ entry:
; FORWARD: jal foo11
; FORWARD: jal foo11
; FORWARD-NOT: nop
+; FORWARD: end foo10
define void @foo10() nounwind {
entry:
@@ -121,3 +126,54 @@ entry:
}
declare void @foo11()
+
+; Check that delay slots of branches in both the entry block and loop body are
+; filled.
+;
+; SUCCBB: succbbs_loop1:
+; SUCCBB: bne ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: addiu
+; SUCCBB: bne ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: addiu
+
+define i32 @succbbs_loop1(i32* nocapture %a, i32 %n) {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %s.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i32 %i.05
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %s.06
+ %inc = add nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %s.0.lcssa
+}
+
+; Check that the first branch has its slot filled.
+;
+; SUCCBB: succbbs_br1:
+; SUCCBB: beq ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: lw $25, %call16(foo100)
+
+define void @succbbs_br1(i32 %a) {
+entry:
+ %tobool = icmp eq i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ tail call void @foo100() #1
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+declare void @foo100()
+