diff options
author | Anton Korobeynikov <asl@math.spbu.ru> | 2010-11-27 23:05:25 +0000 |
---|---|---|
committer | Anton Korobeynikov <asl@math.spbu.ru> | 2010-11-27 23:05:25 +0000 |
commit | 94c5ae08750f314bc3cf1bf882b686244a3927d9 (patch) | |
tree | 2321c65c1ab4eb60729a6a210ebceb4f4508ceae | |
parent | cd775ceff0b25a0b026f643a7990c2924bd310a3 (diff) |
Move more PEI-related hooks to TFI
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120229 91177308-0d34-0410-b5e6-96231b3b80d8
31 files changed, 796 insertions, 793 deletions
diff --git a/include/llvm/Target/TargetFrameInfo.h b/include/llvm/Target/TargetFrameInfo.h index ebaf221aad..3e502e7d74 100644 --- a/include/llvm/Target/TargetFrameInfo.h +++ b/include/llvm/Target/TargetFrameInfo.h @@ -24,6 +24,7 @@ namespace llvm { class MachineFunction; class MachineBasicBlock; class MachineMove; + class RegScavenger; /// Information about stack frame layout on the target. It holds the direction /// of stack growth, the known stack alignment on entry to each function, and @@ -172,6 +173,22 @@ public: /// returned directly, and the base register is returned via FrameReg. virtual int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; + + /// processFunctionBeforeCalleeSavedScan - This method is called immediately + /// before PrologEpilogInserter scans the physical registers used to determine + /// what callee saved registers should be spilled. This method is optional. + virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const { + + } + + /// processFunctionBeforeFrameFinalized - This method is called immediately + /// before the specified function's frame layout (MF.getFrameInfo()) is + /// finalized. Once the frame is finalized, MO_FrameIndex operands are + /// replaced with direct constants. This method is optional. + /// + virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + } }; } // End llvm namespace diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 572bbfb7dd..9caedcb506 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -678,21 +678,6 @@ public: assert(0 && "Call Frame Pseudo Instructions do not exist on this target!"); } - /// processFunctionBeforeCalleeSavedScan - This method is called immediately - /// before PrologEpilogInserter scans the physical registers used to determine - /// what callee saved registers should be spilled. This method is optional. - virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const { - - } - - /// processFunctionBeforeFrameFinalized - This method is called immediately - /// before the specified function's frame layout (MF.getFrameInfo()) is - /// finalized. Once the frame is finalized, MO_FrameIndex operands are - /// replaced with direct constants. This method is optional. - /// - virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - } /// saveScavengerRegister - Spill the register so it can be used by the /// register scavenger. Return true if the register was spilled, false diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index c461623192..5ba7086eef 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -65,6 +65,8 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); @@ -75,7 +77,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. - TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); + TFI->processFunctionBeforeCalleeSavedScan(Fn, RS); // Scan the function for modified callee saved registers and insert spill code // for any callee saved registers that are modified. @@ -95,7 +97,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TRI->processFunctionBeforeFrameFinalized(Fn); + TFI->processFunctionBeforeFrameFinalized(Fn); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 42159c9f4f..3e7c1c3075 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -591,328 +591,6 @@ cannotEliminateFrame(const MachineFunction &MF) const { || needsStackRealignment(MF); } -/// estimateStackSize - Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - } - return (unsigned)Offset; -} - -/// estimateRSStackSizeLimit - Look at each instruction that references stack -/// frames and return the stack size limit beyond which some of these -/// instructions will require a scratch register during their expansion later. -unsigned -ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { - const TargetFrameInfo *TFI = MF.getTarget().getFrameInfo(); - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned Limit = (1 << 12) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!I->getOperand(i).isFI()) continue; - - // When using ADDri to get the address of a stack object, 255 is the - // largest offset guaranteed to fit in the immediate offset. - if (I->getOpcode() == ARM::ADDri) { - Limit = std::min(Limit, (1U << 8) - 1); - break; - } - - // Otherwise check the addressing mode. - switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { - case ARMII::AddrMode3: - case ARMII::AddrModeT2_i8: - Limit = std::min(Limit, (1U << 8) - 1); - break; - case ARMII::AddrMode5: - case ARMII::AddrModeT2_i8s4: - Limit = std::min(Limit, ((1U << 8) - 1) * 4); - break; - case ARMII::AddrModeT2_i12: - // i12 supports only positive offset so these will be converted to - // i8 opcodes. See llvm::rewriteT2FrameIndex. - if (TFI->hasFP(MF) && AFI->hasStackFrame()) - Limit = std::min(Limit, (1U << 8) - 1); - break; - case ARMII::AddrMode4: - case ARMII::AddrMode6: - // Addressing modes 4 & 6 (load/store) instructions can't encode an - // immediate offset for stack references. - return 0; - default: - break; - } - break; // At most one FI per instruction - } - } - } - - return Limit; -} - -static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, - const ARMBaseInstrInfo &TII) { - unsigned FnSize = 0; - for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); - I != E; ++I) - FnSize += TII.GetInstSizeInBytes(I); - } - return FnSize; -} - -void -ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - // This tells PEI to spill the FP as if it is any other callee-save register - // to take advantage the eliminateFrameIndex machinery. This also ensures it - // is spilled in the order specified by getCalleeSavedRegs() to make it easier - // to combine multiple loads / stores. - bool CanEliminateFrame = true; - bool CS1Spilled = false; - bool LRSpilled = false; - unsigned NumGPRSpills = 0; - SmallVector<unsigned, 4> UnspilledCS1GPRs; - SmallVector<unsigned, 4> UnspilledCS2GPRs; - const TargetFrameInfo *TFI = MF.getTarget().getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Spill R4 if Thumb2 function requires stack realignment - it will be used as - // scratch register. Also spill R4 if Thumb2 function has varsized objects, - // since it's always posible to restore sp from fp in a single instruction. - // FIXME: It will be better just to find spare register here. - if (AFI->isThumb2Function() && - (MFI->hasVarSizedObjects() || needsStackRealignment(MF))) - MF.getRegInfo().setPhysRegUsed(ARM::R4); - - // Spill LR if Thumb1 function uses variable length argument lists. - if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0) - MF.getRegInfo().setPhysRegUsed(ARM::LR); - - // Spill the BasePtr if it's used. - if (hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(BasePtr); - - // Don't spill FP if the frame can be eliminated. This is determined - // by scanning the callee-save registers to see if any is used. - const unsigned *CSRegs = getCalleeSavedRegs(); - for (unsigned i = 0; CSRegs[i]; ++i) { - unsigned Reg = CSRegs[i]; - bool Spilled = false; - if (MF.getRegInfo().isPhysRegUsed(Reg)) { - AFI->setCSRegisterIsSpilled(Reg); - Spilled = true; - CanEliminateFrame = false; - } else { - // Check alias registers too. - for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) { - if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { - Spilled = true; - CanEliminateFrame = false; - } - } - } - - if (!ARM::GPRRegisterClass->contains(Reg)) - continue; - - if (Spilled) { - NumGPRSpills++; - - if (!STI.isTargetDarwin()) { - if (Reg == ARM::LR) - LRSpilled = true; - CS1Spilled = true; - continue; - } - - // Keep track if LR and any of R4, R5, R6, and R7 is spilled. - switch (Reg) { - case ARM::LR: - LRSpilled = true; - // Fallthrough - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - CS1Spilled = true; - break; - default: - break; - } - } else { - if (!STI.isTargetDarwin()) { - UnspilledCS1GPRs.push_back(Reg); - continue; - } - - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - UnspilledCS1GPRs.push_back(Reg); - break; - default: - UnspilledCS2GPRs.push_back(Reg); - break; - } - } - } - - bool ForceLRSpill = false; - if (!LRSpilled && AFI->isThumb1OnlyFunction()) { - unsigned FnSize = GetFunctionSizeInBytes(MF, TII); - // Force LR to be spilled if the Thumb function size is > 2048. This enables - // use of BL to implement far jump. If it turns out that it's not needed - // then the branch fix up path will undo it. - if (FnSize >= (1 << 11)) { - CanEliminateFrame = false; - ForceLRSpill = true; - } - } - - // If any of the stack slot references may be out of range of an immediate - // offset, make sure a register (or a spill slot) is available for the - // register scavenger. Note that if we're indexing off the frame pointer, the - // effective stack size is 4 bytes larger since the FP points to the stack - // slot of the previous FP. Also, if we have variable sized objects in the - // function, stack slot references will often be negative, and some of - // our instructions are positive-offset only, so conservatively consider - // that case to want a spill slot (or register) as well. Similarly, if - // the function adjusts the stack pointer during execution and the - // adjustments aren't already part of our stack size estimate, our offset - // calculations may be off, so be conservative. - // FIXME: We could add logic to be more precise about negative offsets - // and which instructions will need a scratch register for them. Is it - // worth the effort and added fragility? - bool BigStack = - (RS && - (estimateStackSize(MF) + ((TFI->hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= - estimateRSStackSizeLimit(MF))) - || MFI->hasVarSizedObjects() - || (MFI->adjustsStack() && !TFI->canSimplifyCallFramePseudos(MF)); - - bool ExtraCSSpill = false; - if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { - AFI->setHasStackFrame(true); - - // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. - // Spill LR as well so we can fold BX_RET to the registers restore (LDM). - if (!LRSpilled && CS1Spilled) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); - AFI->setCSRegisterIsSpilled(ARM::LR); - NumGPRSpills++; - UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), - UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); - ForceLRSpill = false; - ExtraCSSpill = true; - } - - if (TFI->hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(FramePtr); - NumGPRSpills++; - } - - // If stack and double are 8-byte aligned and we are spilling an odd number - // of GPRs, spill one extra callee save GPR so we won't have to pad between - // the integer and double callee save areas. - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - if (TargetAlign == 8 && (NumGPRSpills & 1)) { - if (CS1Spilled && !UnspilledCS1GPRs.empty()) { - for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { - unsigned Reg = UnspilledCS1GPRs[i]; - // Don't spill high register if the function is thumb1 - if (!AFI->isThumb1OnlyFunction() || - isARMLowRegister(Reg) || Reg == ARM::LR) { - MF.getRegInfo().setPhysRegUsed(Reg); - AFI->setCSRegisterIsSpilled(Reg); - if (!isReservedReg(MF, Reg)) - ExtraCSSpill = true; - break; - } - } - } else if (!UnspilledCS2GPRs.empty() && - !AFI->isThumb1OnlyFunction()) { - unsigned Reg = UnspilledCS2GPRs.front(); - MF.getRegInfo().setPhysRegUsed(Reg); - AFI->setCSRegisterIsSpilled(Reg); - if (!isReservedReg(MF, Reg)) - ExtraCSSpill = true; - } - } - - // Estimate if we might need to scavenge a register at some point in order - // to materialize a stack offset. If so, either spill one additional - // callee-saved register or reserve a special spill slot to facilitate - // register scavenging. Thumb1 needs a spill slot for stack pointer - // adjustments also, even when the frame itself is small. - if (BigStack && !ExtraCSSpill) { - // If any non-reserved CS register isn't spilled, just spill one or two - // extra. That should take care of it! - unsigned NumExtras = TargetAlign / 4; - SmallVector<unsigned, 2> Extras; - while (NumExtras && !UnspilledCS1GPRs.empty()) { - unsigned Reg = UnspilledCS1GPRs.back(); - UnspilledCS1GPRs.pop_back(); - if (!isReservedReg(MF, Reg) && - (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || - Reg == ARM::LR)) { - Extras.push_back(Reg); - NumExtras--; - } - } - // For non-Thumb1 functions, also check for hi-reg CS registers - if (!AFI->isThumb1OnlyFunction()) { - while (NumExtras && !UnspilledCS2GPRs.empty()) { - unsigned Reg = UnspilledCS2GPRs.back(); - UnspilledCS2GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { - Extras.push_back(Reg); - NumExtras--; - } - } - } - if (Extras.size() && NumExtras == 0) { - for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MF.getRegInfo().setPhysRegUsed(Extras[i]); - AFI->setCSRegisterIsSpilled(Extras[i]); - } - } else if (!AFI->isThumb1OnlyFunction()) { - // note: Thumb1 functions spill to R12, not the stack. Reserve a slot - // closest to SP or frame pointer. - const TargetRegisterClass *RC = ARM::GPRRegisterClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } - } - } - - if (ForceLRSpill) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); - AFI->setCSRegisterIsSpilled(ARM::LR); - AFI->setLRIsSpilledForFarJump(true); - } -} - unsigned ARMBaseRegisterInfo::getRARegister() const { return ARM::LR; } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 718f484296..23a466fad9 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -154,9 +154,6 @@ public: bool cannotEliminateFrame(const MachineFunction &MF) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; @@ -198,8 +195,6 @@ public: int SPAdj, RegScavenger *RS = NULL) const; private: - unsigned estimateRSStackSizeLimit(MachineFunction &MF) const; - unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMFrameInfo.cpp b/lib/Target/ARM/ARMFrameInfo.cpp index d379bf0e9c..e2531d04d0 100644 --- a/lib/Target/ARM/ARMFrameInfo.cpp +++ b/lib/Target/ARM/ARMFrameInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -614,3 +615,327 @@ bool ARMFrameInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } + +// FIXME: Make generic? +static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, + const ARMBaseInstrInfo &TII) { + unsigned FnSize = 0; + for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + const MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); + I != E; ++I) + FnSize += TII.GetInstSizeInBytes(I); + } + return FnSize; +} + +/// estimateStackSize - Estimate and return the size of the frame. +/// FIXME: Make generic? +static unsigned estimateStackSize(MachineFunction &MF) { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + int Offset = 0; + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -FFI->getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (FFI->isDeadObjectIndex(i)) + continue; + Offset += FFI->getObjectSize(i); + unsigned Align = FFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + } + return (unsigned)Offset; +} + +/// estimateRSStackSizeLimit - Look at each instruction that references stack +/// frames and return the stack size limit beyond which some of these +/// instructions will require a scratch register during their expansion later. +// FIXME: Move to TII? +static unsigned estimateRSStackSizeLimit(MachineFunction &MF, + const TargetFrameInfo *TFI) { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned Limit = (1 << 12) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (!I->getOperand(i).isFI()) continue; + + // When using ADDri to get the address of a stack object, 255 is the + // largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == ARM::ADDri) { + Limit = std::min(Limit, (1U << 8) - 1); + break; + } + + // Otherwise check the addressing mode. + switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode3: + case ARMII::AddrModeT2_i8: + Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode5: + case ARMII::AddrModeT2_i8s4: + Limit = std::min(Limit, ((1U << 8) - 1) * 4); + break; + case ARMII::AddrModeT2_i12: + // i12 supports only positive offset so these will be converted to + // i8 opcodes. See llvm::rewriteT2FrameIndex. + if (TFI->hasFP(MF) && AFI->hasStackFrame()) + Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode4: + case ARMII::AddrMode6: + // Addressing modes 4 & 6 (load/store) instructions can't encode an + // immediate offset for stack references. + return 0; + default: + break; + } + break; // At most one FI per instruction + } + } + } + + return Limit; +} + +void +ARMFrameInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // This tells PEI to spill the FP as if it is any other callee-save register + // to take advantage the eliminateFrameIndex machinery. This also ensures it + // is spilled in the order specified by getCalleeSavedRegs() to make it easier + // to combine multiple loads / stores. + bool CanEliminateFrame = true; + bool CS1Spilled = false; + bool LRSpilled = false; + unsigned NumGPRSpills = 0; + SmallVector<unsigned, 4> UnspilledCS1GPRs; + SmallVector<unsigned, 4> UnspilledCS2GPRs; + const ARMBaseRegisterInfo *RegInfo = + static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + + // Spill R4 if Thumb2 function requires stack realignment - it will be used as + // scratch register. Also spill R4 if Thumb2 function has varsized objects, + // since it's always posible to restore sp from fp in a single instruction. + // FIXME: It will be better just to find spare register here. + if (AFI->isThumb2Function() && + (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) + MF.getRegInfo().setPhysRegUsed(ARM::R4); + + // Spill LR if Thumb1 function uses variable length argument lists. + if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0) + MF.getRegInfo().setPhysRegUsed(ARM::LR); + + // Spill the BasePtr if it's used. + if (RegInfo->hasBasePointer(MF)) + MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); + + // Don't spill FP if the frame can be eliminated. This is determined + // by scanning the callee-save registers to see if any is used. + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + bool Spilled = false; + if (MF.getRegInfo().isPhysRegUsed(Reg)) { + AFI->setCSRegisterIsSpilled(Reg); + Spilled = true; + CanEliminateFrame = false; + } else { + // Check alias registers too. + for (const unsigned *Aliases = + RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) { + if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { + Spilled = true; + CanEliminateFrame = false; + } + } + } + + if (!ARM::GPRRegisterClass->contains(Reg)) + continue; + + if (Spilled) { + NumGPRSpills++; + + if (!STI.isTargetDarwin()) { + if (Reg == ARM::LR) + LRSpilled = true; + CS1Spilled = true; + continue; + } + + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: case ARM::R5: + case ARM::R6: case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + if (!STI.isTargetDarwin()) { + UnspilledCS1GPRs.push_back(Reg); + continue; + } + + switch (Reg) { + case ARM::R4: case ARM::R5: + case ARM::R6: case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; + } + } + } + + bool ForceLRSpill = false; + if (!LRSpilled && AFI->isThumb1OnlyFunction()) { + unsigned FnSize = GetFunctionSizeInBytes(MF, TII); + // Force LR to be spilled if the Thumb function size is > 2048. This enables + // use of BL to implement far jump. If it turns out that it's not needed + // then the branch fix up path will undo it. + if (FnSize >= (1 << 11)) { + CanEliminateFrame = false; + ForceLRSpill = true; + } + } + + // If any of the stack slot references may be out of range of an immediate + // offset, make sure a register (or a spill slot) is available for the + // register scavenger. Note that if we're indexing off the frame pointer, the + // effective stack size is 4 bytes larger since the FP points to the stack + // slot of the previous FP. Also, if we have variable sized objects in the + // function, stack slot references will often be negative, and some of + // our instructions are positive-offset only, so conservatively consider + // that case to want a spill slot (or register) as well. Similarly, if + // the function adjusts the stack pointer during execution and the + // adjustments aren't already part of our stack size estimate, our offset + // calculations may be off, so be conservative. + // FIXME: We could add logic to be more precise about negative offsets + // and which instructions will need a scratch register for them. Is it + // worth the effort and added fragility? + bool BigStack = + (RS && + (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + estimateRSStackSizeLimit(MF, this))) + || MFI->hasVarSizedObjects() + || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); + + bool ExtraCSSpill = false; + if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { + AFI->setHasStackFrame(true); + + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. + // Spill LR as well so we can fold BX_RET to the registers restore (LDM). + if (!LRSpilled && CS1Spilled) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + AFI->setCSRegisterIsSpilled(ARM::LR); + NumGPRSpills++; + UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), + UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + ForceLRSpill = false; + ExtraCSSpill = true; + } + + if (hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(FramePtr); |