diff options
-rw-r--r-- | lib/CodeGen/StrongPHIElimination.cpp | 654 |
1 files changed, 590 insertions, 64 deletions
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index a0ad2fb0c0..fd6e52bd1f 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -1,4 +1,4 @@ -//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===// +//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// // // The LLVM Compiler Infrastructure // @@ -7,6 +7,34 @@ // //===----------------------------------------------------------------------===// // +// This pass eliminates PHI instructions by aggressively coalescing the copies +// that would be inserted by a naive algorithm and only inserting the copies +// that are necessary. The coalescing technique initially assumes that all +// registers appearing in a PHI instruction do not interfere. It then eliminates +// proven interferences, using dominators to only perform a linear number of +// interference tests instead of the quadratic number of interference tests +// that this would naively require. This is a technique derived from: +// +// Budimlic, et al. Fast copy coalescing and live-range identification. +// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language +// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). +// PLDI '02. ACM, New York, NY, 25-32. +// +// The original implementation constructs a data structure they call a dominance +// forest for this purpose. The dominance forest was shown to be unnecessary, +// as it is possible to emulate the creation and traversal of a dominance forest +// by directly using the dominator tree, rather than actually constructing the +// dominance forest. This technique is explained in: +// +// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code +// Quality and Efficiency, +// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code +// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). +// CGO '09. IEEE, Washington, DC, 114-125. +// +// Careful implementation allows for all of the dominator forest interference +// checks to be performed at once in a single depth-first traversal of the +// dominator tree, which is what is implemented here. // //===----------------------------------------------------------------------===// @@ -34,14 +62,104 @@ namespace { bool runOnMachineFunction(MachineFunction&); private: + /// This struct represents a single node in the union-find data structure + /// representing the variable congruence classes. There is one difference + /// from a normal union-find data structure. We steal two bits from the parent + /// pointer . One of these bits is used to represent whether the register + /// itself has been isolated, and the other is used to represent whether the + /// PHI with that register as its destination has been isolated. + /// + /// Note that this leads to the strange situation where the leader of a + /// congruence class may no longer logically be a member, due to being + /// isolated. + struct Node { + enum Flags { + kRegisterIsolatedFlag = 1, + kPHIIsolatedFlag = 2 + }; + Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } + + Node* getLeader(); + + PointerIntPair<Node*, 2> parent; + unsigned value; + unsigned rank; + }; + + /// Add a register in a new congruence class containing only itself. + void addReg(unsigned); + + /// Join the congruence classes of two registers. + void unionRegs(unsigned, unsigned); + + /// Get the color of a register. The color is 0 if the register has been + /// isolated. + unsigned getRegColor(unsigned); + + // Isolate a register. + void isolateReg(unsigned); + + /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been + /// isolated. Otherwise, it is the original color of its destination and + /// all of its operands (before they were isolated, if they were). + unsigned getPHIColor(MachineInstr*); + + /// Isolate a PHI. + void isolatePHI(MachineInstr*); + + void PartitionRegisters(MachineFunction& MF); + + /// Traverses a basic block, splitting any interferences found between + /// registers in the same congruence class. It takes two DenseMaps as + /// arguments that it also updates: CurrentDominatingParent, which maps + /// a color to the register in that congruence class whose definition was + /// most recently seen, and ImmediateDominatingParent, which maps a register + /// to the register in the same congruence class that most immediately + /// dominates it. + /// + /// This function assumes that it is being called in a depth-first traversal + /// of the dominator tree. + void SplitInterferencesForBasicBlock( + MachineBasicBlock&, + DenseMap<unsigned, unsigned>& CurrentDominatingParent, + DenseMap<unsigned, unsigned>& ImmediateDominatingParent); + + // Lowers a PHI instruction, inserting copies of the source and destination + // registers as necessary. void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); + // Merges the live interval of Reg into NewReg and renames Reg to NewReg + // everywhere that Reg appears. Requires Reg and NewReg to have non- + // overlapping lifetimes. + void MergeLIsAndRename(unsigned Reg, unsigned NewReg); + MachineRegisterInfo* MRI; const TargetInstrInfo* TII; + MachineDominatorTree* DT; LiveIntervals* LI; - typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > CopySet; - CopySet InsertedCopies; + BumpPtrAllocator Allocator; + + DenseMap<unsigned, Node*> RegNodeMap; + + // FIXME: Can these two data structures be combined? Would a std::multimap + // be any better? + + // Stores pairs of predecessor basic blocks and the source registers of + // inserted copy instructions. + typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet; + SrcCopySet InsertedSrcCopySet; + + // Maps pairs of predecessor basic blocks and colors to their defining copy + // instructions. + typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*> + SrcCopyMap; + SrcCopyMap InsertedSrcCopyMap; + + // Maps inserted destination copy registers to their defining copy + // instructions. + typedef DenseMap<unsigned, MachineInstr*> DestCopyMap; + DestCopyMap InsertedDestCopies; }; } // namespace @@ -86,8 +204,22 @@ static MachineOperand* findLastUse(MachineBasicBlock* MBB, unsigned Reg) { bool StrongPHIElimination::runOnMachineFunction(MachineFunction& MF) { MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); + DT = &getAnalysis<MachineDominatorTree>(); LI = &getAnalysis<LiveIntervals>(); + PartitionRegisters(MF); + + // Perform a depth-first traversal of the dominator tree, splitting + // interferences amongst PHI-congruence classes. + DenseMap<unsigned, unsigned> CurrentDominatingParent; + DenseMap<unsigned, unsigned> ImmediateDominatingParent; + for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) { + SplitInterferencesForBasicBlock(*DI->getBlock(), + CurrentDominatingParent, + ImmediateDominatingParent); + } + // Insert copies for all PHI source and destination registers. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { @@ -97,13 +229,86 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction& MF) { } } + // FIXME: Preserve the equivalence classes during copy insertion and use + // the preversed equivalence classes instead of recomputing them. + RegNodeMap.clear(); + PartitionRegisters(MF); + + DenseMap<unsigned, unsigned> RegRenamingMap; + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + while (BBI != BBE && BBI->isPHI()) { + MachineInstr* PHI = BBI; + + assert(PHI->getNumOperands() > 0); + + unsigned SrcReg = PHI->getOperand(1).getReg(); + unsigned SrcColor = getRegColor(SrcReg); + unsigned NewReg = RegRenamingMap[SrcColor]; + if (!NewReg) { + NewReg = SrcReg; + RegRenamingMap[SrcColor] = SrcReg; + } + MergeLIsAndRename(SrcReg, NewReg); + + unsigned DestReg = PHI->getOperand(0).getReg(); + if (!InsertedDestCopies.count(DestReg)) + MergeLIsAndRename(DestReg, NewReg); + + for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { + unsigned SrcReg = PHI->getOperand(i).getReg(); + MergeLIsAndRename(SrcReg, NewReg); + } + + ++BBI; + LI->RemoveMachineInstrFromMaps(PHI); + PHI->eraseFromParent(); + Changed = true; + } + } + + // Due to the insertion of copies to split live ranges, the live intervals are + // guaranteed to not overlap, except in one case: an original PHI source and a + // PHI destination copy. In this case, they have the same value and thus don't + // truly intersect, so we merge them into the value live at that point. + // FIXME: Is there some better way we can handle this? + for (DestCopyMap::iterator I = InsertedDestCopies.begin(), + E = InsertedDestCopies.end(); I != E; ++I) { + unsigned DestReg = I->first; + unsigned DestColor = getRegColor(DestReg); + unsigned NewReg = RegRenamingMap[DestColor]; + + LiveInterval& DestLI = LI->getInterval(DestReg); + LiveInterval& NewLI = LI->getInterval(NewReg); + + assert(DestLI.containsOneValue()); + LiveRange* DestLR = DestLI.begin(); + VNInfo* NewVNI = NewLI.getVNInfoAt(DestLR->start); + if (!NewVNI) { + NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); + MachineInstr* CopyInstr = I->second; + CopyInstr->getOperand(1).setIsKill(true); + } + + LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); + NewLI.addRange(NewLR); + + LI->removeInterval(DestReg); + MRI->replaceRegWith(DestReg, NewReg); + } + // Adjust the live intervals of all PHI source registers to handle the case // where the PHIs in successor blocks were the only later uses of the source // register. - for (CopySet::iterator I = InsertedCopies.begin(), E = InsertedCopies.end(); - I != E; ++I) { + for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), + E = InsertedSrcCopySet.end(); I != E; ++I) { MachineBasicBlock* MBB = I->first; unsigned SrcReg = I->second; + if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) + SrcReg = RenamedRegister; + LiveInterval& SrcLI = LI->getInterval(SrcReg); bool isLiveOut = false; @@ -120,37 +325,376 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction& MF) { MachineOperand* LastUse = findLastUse(MBB, SrcReg); assert(LastUse); - SrcLI.removeRange(LI->getInstructionIndex(LastUse->getParent()).getDefIndex(), - LI->getMBBEndIdx(MBB)); + SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); + SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } - // Remove all PHI instructions from the function. - bool Changed = false; + LI->renumber(); + + Allocator.Reset(); + RegNodeMap.clear(); + InsertedSrcCopySet.clear(); + InsertedSrcCopyMap.clear(); + InsertedDestCopies.clear(); + + return Changed; +} + +void StrongPHIElimination::addReg(unsigned Reg) { + if (RegNodeMap.count(Reg)) + return; + RegNodeMap[Reg] = new (Allocator) Node(Reg); +} + +StrongPHIElimination::Node* +StrongPHIElimination::Node::getLeader() { + Node* parentPointer = parent.getPointer(); + if (parentPointer == this) + return this; + Node* newParent = parentPointer->getLeader(); + parent.setPointer(newParent); + return newParent; +} + +unsigned StrongPHIElimination::getRegColor(unsigned Reg) { + DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg); + if (RI == RegNodeMap.end()) + return 0; + Node* Node = RI->second; + if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) + return 0; + return Node->getLeader()->value; +} + +void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { + Node* Node1 = RegNodeMap[Reg1]->getLeader(); + Node* Node2 = RegNodeMap[Reg2]->getLeader(); + + if (Node1->rank > Node2->rank) { + Node2->parent.setPointer(Node1->getLeader()); + } else if (Node1->rank < Node2->rank) { + Node1->parent.setPointer(Node2->getLeader()); + } else if (Node1 != Node2) { + Node2->parent.setPointer(Node1->getLeader()); + Node1->rank++; + } +} + +void StrongPHIElimination::isolateReg(unsigned Reg) { + Node* Node = RegNodeMap[Reg]; + Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); +} + +unsigned StrongPHIElimination::getPHIColor(MachineInstr* PHI) { + assert(PHI->isPHI()); + + unsigned DestReg = PHI->getOperand(0).getReg(); + Node* DestNode = RegNodeMap[DestReg]; + if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) + return 0; + + for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { + unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); + if (SrcColor) + return SrcColor; + } + return 0; +} + +void StrongPHIElimination::isolatePHI(MachineInstr* PHI) { + assert(PHI->isPHI()); + Node* Node = RegNodeMap[PHI->getOperand(0).getReg()]; + Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); +} + +void StrongPHIElimination::PartitionRegisters(MachineFunction& MF) { for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - while (BBI != BBE && BBI->isPHI()) { - MachineInstr* PHI = BBI; - ++BBI; - LI->RemoveMachineInstrFromMaps(PHI); - PHI->eraseFromParent(); - Changed = true; + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + unsigned DestReg = BBI->getOperand(0).getReg(); + addReg(DestReg); + + for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { + unsigned SrcReg = BBI->getOperand(i).getReg(); + addReg(SrcReg); + unionRegs(DestReg, SrcReg); + } } } +} - LI->renumber(); - MF.verify(this); +/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any +/// interferences found between registers in the same congruence class. It +/// takes two DenseMaps as arguments that it also updates: +/// +/// 1) CurrentDominatingParent, which maps a color to the register in that +/// congruence class whose definition was most recently seen. +/// +/// 2) ImmediateDominatingParent, which maps a register to the register in the +/// same congruence class that most immediately dominates it. +/// +/// This function assumes that it is being called in a depth-first traversal +/// of the dominator tree. +/// +/// The algorithm used here is a generalization of the dominance-based SSA test +/// for two variables. If there are variables a_1, ..., a_n such that +/// +/// def(a_1) dom ... dom def(a_n), +/// +/// then we can test for an interference between any two a_i by only using O(n) +/// interference tests between pairs of variables. If i < j and a_i and a_j +/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). +/// Thus, in order to test for an interference involving a_i, we need only check +/// for a potential interference with a_i+1. +/// +/// This method can be generalized to arbitrary sets of variables by performing +/// a depth-first traversal of the dominator tree. As we traverse down a branch +/// of the dominator tree, we keep track of the current dominating variable and +/// only perform an interference test with that variable. However, when we go to +/// another branch of the dominator tree, the definition of the current dominating +/// variable may no longer dominate the current block. In order to correct this, +/// we need to use a stack of past choices of the current dominating variable +/// and pop from this stack until we find a variable whose definition actually +/// dominates the current block. +/// +/// There will be one push on this stack for each variable that has become the +/// current dominating variable, so instead of using an explicit stack we can +/// simply associate the previous choice for a current dominating variable with +/// the new choice. This works better in our implementation, where we test for +/// interference in multiple distinct sets at once. +void +StrongPHIElimination::SplitInterferencesForBasicBlock( + MachineBasicBlock& MBB, + DenseMap<unsigned, unsigned>& CurrentDominatingParent, + DenseMap<unsigned, unsigned>& ImmediateDominatingParent) { + for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); + BBI != BBE; ++BBI) { + for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = BBI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned DestReg = MO.getReg(); + if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // If the virtual register being defined is not used in any PHI or has + // already been isolated, then there are no more interferences to check. + unsigned DestColor = getRegColor(DestReg); + if (!DestColor) + continue; + + // The input to this pass sometimes is not in SSA form in every basic + // block, as some virtual registers have redefinitions. We could eliminate + // this by fixing the passes that generate the non-SSA code, or we could + // handle it here by tracking defining machine instructions rather than + // virtual registers. For now, we just handle the situation conservatively + // in a way that will possibly lead to false interferences. + unsigned NewParent = CurrentDominatingParent[DestColor]; + if (NewParent == DestReg) + continue; + + // Pop registers from the stack represented by ImmediateDominatingParent + // until we find a parent that dominates the current instruction. + while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), BBI) + || !getRegColor(NewParent))) + NewParent = ImmediateDominatingParent[NewParent]; + + // If NewParent is nonzero, then its definition dominates the current + // instruction, so it is only necessary to check for the liveness of + // NewParent in order to check for an interference. + if (NewParent + && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(BBI))) { + // If there is an interference, always isolate the new register. This + // could be improved by using a heuristic that decides which of the two + // registers to isolate. + isolateReg(DestReg); + CurrentDominatingParent[DestColor] = NewParent; + } else { + // If there is no interference, update ImmediateDominatingParent and set + // the CurrentDominatingParent for this color to the current register. + ImmediateDominatingParent[DestReg] = NewParent; + CurrentDominatingParent[DestColor] = DestReg; + } + } + } - InsertedCopies.clear(); + // We now walk the PHIs in successor blocks and check for interferences. This + // is necesary because the use of a PHI's operands are logically contained in + // the predecessor block. The def of a PHI's destination register is processed + // along with the other defs in a basic block. + + // The map CurrentPHIForColor maps a color to a pair of a MachineInstr* and a + // virtual register, which is the operand of that PHI corresponding to the + // current basic block. + // FIXME: This should use a container that doesn't always perform heap + // allocation. + DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor; + + for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + MachineInstr* PHI = BBI; - return Changed; + // If a PHI is already isolated, either by being isolated directly or + // having all of its operands isolated, ignore it. + unsigned Color = getPHIColor(PHI); + if (!Color) + continue; + + // Find the index of the PHI operand that corresponds to this basic block. + unsigned PredIndex; + for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { + if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) + break; + } + assert(PredIndex < PHI->getNumOperands()); + unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); + + // Pop registers from the stack represented by ImmediateDominatingParent + // until we find a parent that dominates the current instruction. + unsigned NewParent = CurrentDominatingParent[Color]; + while (NewParent + && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) + || !getRegColor(NewParent))) + NewParent = ImmediateDominatingParent[NewParent]; + CurrentDominatingParent[Color] = NewParent; + + // If there is an interference with a register, always isolate the + // register rather than the PHI. It is also possible to isolate the + // PHI, but that introduces copies for all of the registers involved + // in that PHI. + if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) + && NewParent != PredOperandReg) + isolateReg(NewParent); + + std::pair<MachineInstr*, unsigned> CurrentPHI = CurrentPHIForColor[Color]; + + // If two PHIs have the same operand from every shared predecessor, then + // they don't actually interfere. Otherwise, isolate the current PHI. This + // could possibly be improved, e.g. we could isolate the PHI with the + // fewest operands. + if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) + isolatePHI(PHI); + else + CurrentPHIForColor[Color] = std::make_pair(PHI, PredOperandReg); + } + } } void StrongPHIElimination::InsertCopiesForPHI(MachineInstr* PHI, MachineBasicBlock* MBB) { assert(PHI->isPHI()); + unsigned PHIColor = getPHIColor(PHI); + + for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { + MachineOperand& SrcMO = PHI->getOperand(i); + + // If a source is defined by an implicit def, there is no need to insert a + // copy in the predecessor. + if (SrcMO.isUndef()) + continue; + + unsigned SrcReg = SrcMO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + MachineBasicBlock* PredBB = PHI->getOperand(i + 1).getMBB(); + unsigned SrcColor = getRegColor(SrcReg); + + // If neither the PHI nor the operand were isolated, then we only need to + // set the phi-kill flag on the VNInfo at this PHI. + if (PHIColor && SrcColor == PHIColor) { + LiveInterval& SrcInterval = LI->getInterval(SrcReg); + SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); + VNInfo* SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex()); + assert(SrcVNI); + SrcVNI->setHasPHIKill(true); + continue; + } + + unsigned CopyReg = 0; + if (PHIColor) { + SrcCopyMap::const_iterator I + = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); + CopyReg + = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; + } + + if (!CopyReg) { + const TargetRegisterClass* RC = MRI->getRegClass(SrcReg); + CopyReg = MRI->createVirtualRegister(RC); + + MachineBasicBlock::iterator + CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); + unsigned SrcSubReg = SrcMO.getSubReg(); + MachineInstr* CopyInstr = BuildMI(*PredBB, + CopyInsertPoint, + PHI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + CopyReg).addReg(SrcReg, 0, SrcSubReg); + LI->InsertMachineInstrInMaps(CopyInstr); + + // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for + // the newly added range. + LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); + InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); + + addReg(CopyReg); + if (PHIColor) { + unionRegs(PHIColor, CopyReg); + assert(getRegColor(CopyReg) != CopyReg); + } else { + PHIColor = CopyReg; + assert(getRegColor(CopyReg) == CopyReg); + } + + if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor))) + InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr; + } + + SrcMO.setReg(CopyReg); + + // If SrcReg is not live beyond the PHI, trim its interval so that it is no + // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are + // processed later, but this is still correct to do at this point because we + // never rely on LiveIntervals being correct while inserting copies. + // FIXME: Should this just count uses at PHIs like the normal PHIElimination + // pass does? + LiveInterval& SrcLI = LI->getInterval(SrcReg); + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); + if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) + SrcLI.removeRange(MBBStartIndex, PHIIndex, true); + } + unsigned DestReg = PHI->getOperand(0).getReg(); + unsigned DestColor = getRegColor(DestReg); + + if (PHIColor && DestColor == PHIColor) { + LiveInterval& DestLI = LI->getInterval(DestReg); + + // Set the phi-def flag for the VN at this PHI. + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + VNInfo* DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex()); + assert(DestVNI); + DestVNI->setIsPHIDef(true); + + // Prior to PHI elimination, the live ranges of PHIs begin at their defining + // instruction. After PHI elimination, PHI instructions are replaced by VNs + // with the phi-def flag set, and the live ranges of these VNs start at the + // beginning of the basic block. + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + DestVNI->def = MBBStartIndex; + DestLI.addRange(LiveRange(MBBStartIndex, + PHIIndex.getDefIndex(), + DestVNI)); + return; + } const TargetRegisterClass* RC = MRI->getRegClass(DestReg); unsigned CopyReg = MRI->createVirtualRegister(RC); @@ -161,7 +705,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr* PHI, TII->get(TargetOpcode::COPY), DestReg).addReg(CopyReg); LI->InsertMachineInstrInMaps(CopyInstr); - CopyInstr->getOperand(1).setIsKill(true); + PHI->getOperand(0).setReg(CopyReg); // Add the region from the beginning of MBB to the copy instruction to // CopyReg's live interval, and give the VNInfo the phidef flag. @@ -186,53 +730,35 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr* PHI, assert(DestVNI); DestVNI->def = DestCopyIndex.getDefIndex(); - SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - MachineOperand& SrcMO = PHI->getOperand(i); - - // If a source is defined by an implicit def, there is no need to insert a - // copy in the predecessor. - if (SrcMO.isUndef()) - continue; - - unsigned SrcReg = SrcMO.getReg(); - unsigned SrcSubReg = SrcMO.getSubReg(); - - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && - "Machine PHI Operands must all be virtual registers!"); + InsertedDestCopies[CopyReg] = CopyInstr; +} - MachineBasicBlock* PredBB = PHI->getOperand(i + 1).getMBB(); +void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { + if (Reg == NewReg) + return; - // A copy may have already been inserted in the predecessor in the case of a - // block with multiple incoming edges. - if (!MBBsInsertedInto.insert(PredBB)) - continue; + LiveInterval& OldLI = LI->getInterval(Reg); + LiveInterval& NewLI = LI->getInterval(NewReg); - MachineBasicBlock::iterator - CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); - MachineInstr* CopyInstr = BuildMI(*PredBB, - CopyInsertPoint, - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - CopyReg).addReg(SrcReg, 0, SrcSubReg); - LI->InsertMachineInstrInMaps(CopyInstr); + // Merge the live ranges of the two registers. + DenseMap<VNInfo*, VNInfo*> VNMap; + for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); + LRI != LRE; ++LRI) { + LiveRange OldLR = *LRI; + VNInfo* OldVN = OldLR.valno; - // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for - // the newly added range. - LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); - InsertedCopies.insert(std::make_pair(PredBB, SrcReg)); + VNInfo*& NewVN = VNMap[OldVN]; + if (!NewVN) { + NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); + VNMap[OldVN] = NewVN; + } - // If SrcReg is not live beyond the PHI, trim its interval so that it is no - // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are - // processed later, but this is still correct to do at this point because we - // never rely on LiveIntervals being correct while inserting copies. - // FIXME: Should this just count uses at PHIs like the normal PHIElimination - // pass does? - LiveInterval& SrcLI = LI->getInterval(SrcReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); - if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) - SrcLI.removeRange(MBBStartIndex, PHIIndex, true); + LiveRange LR(OldLR.start, OldLR.end, NewVN); + NewLI.addRange(LR); } + + // Remove the LiveInterval for the register being renamed and replace all + // of its defs and uses with the new register. + LI->removeInterval(Reg); + MRI->replaceRegWith(Reg, NewReg); } |