diff options
-rw-r--r-- | lib/CodeGen/PrologEpilogInserter.cpp | 65 | ||||
-rw-r--r-- | lib/CodeGen/PrologEpilogInserter.h | 1 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseRegisterInfo.cpp | 8 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb1RegisterInfo.cpp | 136 |
4 files changed, 151 insertions, 59 deletions
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index b568ad828b..dd39853167 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -31,7 +31,9 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" #include <climits> @@ -42,6 +44,16 @@ char PEI::ID = 0; static RegisterPass<PEI> X("prologepilog", "Prologue/Epilogue Insertion"); +// FIXME: For now, the frame index scavenging is off by default and only +// used by the Thumb1 target. When it's the default and replaces the current +// on-the-fly PEI scavenging for all targets, requiresRegisterScavenging() +// will replace this. +cl::opt<bool> +FrameIndexVirtualScavenging("enable-frame-index-scavenging", + cl::Hidden, + cl::desc("Enable frame index elimination with" + "virtual register scavenging")); + /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. /// @@ -104,6 +116,12 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // replaceFrameIndices(Fn); + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimiation + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn); + delete RS; clearAllSets(); return true; @@ -634,7 +652,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { @@ -680,7 +698,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // use that target machine register info object to eliminate // it. - TRI.eliminateFrameIndex(MI, SPAdj, RS); + TRI.eliminateFrameIndex(MI, SPAdj, FrameIndexVirtualScavenging ? + NULL : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -695,10 +714,50 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (DoIncr && I != BB->end()) ++I; // Update register states. - if (RS && MI) RS->forward(MI); + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); } } +void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(BB); + + // Keep a map of which scratch reg we use for each virtual reg. + // FIXME: Is a map like this the best solution? Seems like overkill, + // but to get rid of it would need some fairly strong assumptions + // that may not be valid as this gets smarter about reuse and such. + IndexedMap<unsigned, VirtReg2IndexFunctor> ScratchRegForVirtReg; + ScratchRegForVirtReg.grow(Fn.getRegInfo().getLastVirtReg()); + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + MachineInstr *MI = I; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isReg()) { + unsigned Reg = MI->getOperand(i).getReg(); + if (Reg && TRI->isVirtualRegister(Reg)) { + // If we already have a scratch for this virtual register, use it + unsigned NewReg = ScratchRegForVirtReg[Reg]; + if (!NewReg) { + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + NewReg = RS->FindUnusedReg(RC); + if (NewReg == 0) + // No register is "free". Scavenge a register. + // FIXME: Track SPAdj. Zero won't always be right + NewReg = RS->scavengeRegister(RC, I, 0); + assert (NewReg && "unable to scavenge register!"); + ScratchRegForVirtReg[Reg] = NewReg; + } + MI->getOperand(i).setReg(NewReg); + } + } + RS->forward(MI); + } + } +} diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index b143554e80..d0a68e19a2 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -123,6 +123,7 @@ namespace llvm { void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); + void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); // Initialize DFA sets, called before iterations. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e591bbab8b..648f97fb1b 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -649,10 +649,12 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Estimate if we might need to scavenge a register at some point in order // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate - // register scavenging. - if (RS && !ExtraCSSpill && !AFI->isThumb1OnlyFunction()) { + // register scavenging. Thumb1 needs a spill slot for stack pointer + // adjustments also, even when the frame itself is small. + if (RS && !ExtraCSSpill) { MachineFrameInfo *MFI = MF.getFrameInfo(); - if (estimateStackSize(MF, MFI) >= estimateRSStackSizeLimit(MF)) { + if (estimateStackSize(MF, MFI) >= estimateRSStackSizeLimit(MF) + || AFI->isThumb1OnlyFunction()) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign / 4; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 258912be79..ea5f0727a1 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -37,10 +37,10 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt<bool> -ThumbRegScavenging("enable-thumb-reg-scavenging", - cl::Hidden, - cl::desc("Enable register scavenging on Thumb")); +// FIXME: This cmd line option conditionalizes the new register scavenging +// implemenation in PEI. Remove the option when scavenging works well enough +// to be the default. +extern cl::opt<bool> FrameIndexVirtualScavenging; Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) @@ -84,7 +84,7 @@ Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const { bool Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return ThumbRegScavenging; + return FrameIndexVirtualScavenging; } bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { @@ -113,6 +113,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, const TargetInstrInfo &TII, const Thumb1RegisterInfo& MRI, DebugLoc dl) { + MachineFunction &MF = *MBB.getParent(); bool isHigh = !isARMLowRegister(DestReg) || (BaseReg != 0 && !isARMLowRegister(BaseReg)); bool isSub = false; @@ -127,9 +128,13 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, unsigned LdReg = DestReg; if (DestReg == ARM::SP) { assert(BaseReg == ARM::SP && "Unexpected!"); - LdReg = ARM::R3; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R3, RegState::Kill); + if (FrameIndexVirtualScavenging) { + LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + } else { + LdReg = ARM::R3; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + } } if (NumBytes <= 255 && NumBytes >= 0) @@ -155,7 +160,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); AddDefaultPred(MIB); - if (DestReg == ARM::SP) + if (!FrameIndexVirtualScavenging && DestReg == ARM::SP) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) .addReg(ARM::R12, RegState::Kill); } @@ -602,50 +607,73 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else // tLDR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { - // FIXME! This is horrific!!! We need register scavenging. - // Our temporary workaround has marked r3 unavailable. Of course, r3 is - // also a ABI register so it's possible that is is the register that is - // being storing here. If that's the case, we do the following: - // r12 = r2 - // Use r2 to materialize sp + offset - // str r3, r2 - // r2 = r12 - unsigned ValReg = MI.getOperand(0).getReg(); - unsigned TmpReg = ARM::R3; - bool UseRR = false; - if (ValReg == ARM::R3) { - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R2, RegState::Kill); - TmpReg = ARM::R2; - } - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - if (Opcode == ARM::tSpill) { - if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); - else { - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); - UseRR = true; + if (FrameIndexVirtualScavenging) { + unsigned TmpReg = + MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + bool UseRR = false; + if (Opcode == ARM::tSpill) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tSTR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tSTR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + } else { + // FIXME! This is horrific!!! We need register scavenging. + // Our temporary workaround has marked r3 unavailable. Of course, r3 is + // also a ABI register so it's possible that is is the register that is + // being storing here. If that's the case, we do the following: + // r12 = r2 + // Use r2 to materialize sp + offset + // str r3, r2 + // r2 = r12 + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned TmpReg = ARM::R3; + bool UseRR = false; + if (ValReg == ARM::R3) { + BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) + .addReg(ARM::R2, RegState::Kill); + TmpReg = ARM::R2; } - } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); - MI.setDesc(TII.get(ARM::tSTR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tSTR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + if (Opcode == ARM::tSpill) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tSTR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tSTR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); - MachineBasicBlock::iterator NII = next(II); - if (ValReg == ARM::R3) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) - .addReg(ARM::R12, RegState::Kill); - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) - .addReg(ARM::R12, RegState::Kill); + MachineBasicBlock::iterator NII = next(II); + if (ValReg == ARM::R3) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) + .addReg(ARM::R12, RegState::Kill); + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) + .addReg(ARM::R12, RegState::Kill); + } } else assert(false && "Unexpected opcode!"); @@ -834,11 +862,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, if (VARegSaveSize) { // Epilogue for vararg functions: pop LR to R3 and branch off it. // FIXME: Verify this is still ok when R3 is no longer being reserved. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))).addReg(ARM::R3); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) + .addReg(ARM::R3, RegState::Kill); MBB.erase(MBBI); } } |