diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/PowerPC/PPC.h | 1 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPC32ISelSimple.cpp | 29 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCTargetMachine.cpp | 5 | ||||
-rw-r--r-- | lib/Target/PowerPC/PowerPC.td | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PowerPCISelSimple.cpp | 29 | ||||
-rw-r--r-- | lib/Target/PowerPC/PowerPCPEI.cpp | 326 | ||||
-rw-r--r-- | lib/Target/PowerPC/PowerPCRegisterInfo.cpp | 108 | ||||
-rw-r--r-- | lib/Target/PowerPC/README.txt | 11 |
8 files changed, 405 insertions, 108 deletions
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 002132873d..e63892ddb3 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -26,6 +26,7 @@ class TargetMachine; // passes. For example: FunctionPass *createPPCSimpleInstructionSelector(TargetMachine &TM); FunctionPass *createPPCCodePrinterPass(std::ostream &OS, TargetMachine &TM); +FunctionPass *createPowerPCPEI(); FunctionPass *createPPCBranchSelectionPass(); } // end namespace llvm; diff --git a/lib/Target/PowerPC/PPC32ISelSimple.cpp b/lib/Target/PowerPC/PPC32ISelSimple.cpp index 8da6b9d705..35272380d6 100644 --- a/lib/Target/PowerPC/PPC32ISelSimple.cpp +++ b/lib/Target/PowerPC/PPC32ISelSimple.cpp @@ -646,11 +646,8 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from /// the stack into virtual registers. -/// -/// FIXME: When we can calculate which args are coming in via registers -/// source them from there instead. void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { - unsigned ArgOffset = 20; // FIXME why is this not 24? + unsigned ArgOffset = 24; unsigned GPR_remaining = 8; unsigned FPR_remaining = 13; unsigned GPR_idx = 0, FPR_idx = 0; @@ -1412,8 +1409,10 @@ void ISel::visitBranchInst(BranchInst &BI) { /// <http://developer.apple.com/documentation/DeveloperTools/Conceptual/MachORuntime/2rt_powerpc_abi/chapter_9_section_5.html> void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, const std::vector<ValueRecord> &Args, bool isVarArg) { - // Count how many bytes are to be pushed on the stack... - unsigned NumBytes = 0; + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. + unsigned NumBytes = 24; + unsigned ArgOffset = 24; if (!Args.empty()) { for (unsigned i = 0, e = Args.size(); i != e; ++i) @@ -1430,12 +1429,16 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, default: assert(0 && "Unknown class!"); } + // Just to be safe, we'll always reserve the full 32 bytes worth of + // argument passing space in case any called code gets funky on us. + if (NumBytes < 24 + 32) NumBytes = 24 + 32; + // Adjust the stack pointer for the new arguments... - BuildMI(BB, PPC32::ADJCALLSTACKDOWN, 1).addSImm(NumBytes); + // These functions are automatically eliminated by the prolog/epilog pass + BuildMI(BB, PPC32::ADJCALLSTACKDOWN, 1).addImm(NumBytes); // Arguments go on the stack in reverse order, as specified by the ABI. // Offset to the paramater area on the stack is 24. - unsigned ArgOffset = 24; int GPR_remaining = 8, FPR_remaining = 13; unsigned GPR_idx = 0, FPR_idx = 0; static const unsigned GPR[] = { @@ -1573,12 +1576,14 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, GPR_idx++; } } else { - BuildMI(BB, PPC32::ADJCALLSTACKDOWN, 1).addSImm(0); + BuildMI(BB, PPC32::ADJCALLSTACKDOWN, 1).addImm(0); } BuildMI(BB, PPC32::IMPLICIT_DEF, 0, PPC32::LR); BB->push_back(CallMI); - BuildMI(BB, PPC32::ADJCALLSTACKUP, 1).addSImm(NumBytes); + + // These functions are automatically eliminated by the prolog/epilog pass + BuildMI(BB, PPC32::ADJCALLSTACKUP, 1).addImm(NumBytes); // If there is a return value, scavenge the result from the location the call // leaves it in... @@ -1592,11 +1597,11 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, // Integral results are in r3 BuildMI(BB, PPC32::OR, 2, Ret.Reg).addReg(PPC32::R3).addReg(PPC32::R3); break; - case cFP32: // Floating-point return values live in f1 + case cFP32: // Floating-point return values live in f1 case cFP64: BuildMI(BB, PPC32::FMR, 1, Ret.Reg).addReg(PPC32::F1); break; - case cLong: // Long values are in r3 hi:r4 lo + case cLong: // Long values are in r3:r4 BuildMI(BB, PPC32::OR, 2, Ret.Reg).addReg(PPC32::R3).addReg(PPC32::R3); BuildMI(BB, PPC32::OR, 2, Ret.Reg+1).addReg(PPC32::R4).addReg(PPC32::R4); break; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ceb6f36aab..ca5abb2a8c 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -85,7 +85,10 @@ bool PowerPCTargetMachine::addPassesToEmitAssembly(PassManager &PM, if (PrintMachineCode) PM.add(createMachineFunctionPrinterPass(&std::cerr)); - PM.add(createPrologEpilogCodeInserter()); + // I want a PowerPC specific prolog/epilog code inserter so I can put the + // fills/spills in the right spots. + PM.add(createPowerPCPEI()); + // Must run branch selection immediately preceding the printer PM.add(createPPCBranchSelectionPass()); PM.add(createPPCCodePrinterPass(Out, *this)); diff --git a/lib/Target/PowerPC/PowerPC.td b/lib/Target/PowerPC/PowerPC.td index f10621b332..a92bae812d 100644 --- a/lib/Target/PowerPC/PowerPC.td +++ b/lib/Target/PowerPC/PowerPC.td @@ -33,11 +33,11 @@ def PowerPC : Target { let PointerType = i32; // According to the Mach-O Runtime ABI, these regs are nonvolatile across - // calls: put LR in here someday when we can Do The Right Thing + // calls let CalleeSavedRegisters = [R1, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, - F30, F31, CR2, CR3, CR4]; + F30, F31, CR2, CR3, CR4, LR]; // Pull in Instruction Info: let InstructionSet = PowerPCInstrInfo; diff --git a/lib/Target/PowerPC/PowerPCISelSimple.cpp b/lib/Target/PowerPC/PowerPCISelSimple.cpp index 8da6b9d705..35272380d6 100644 --- a/lib/Target/PowerPC/PowerPCISelSimple.cpp +++ b/lib/Target/PowerPC/PowerPCISelSimple.cpp @@ -646,11 +646,8 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from /// the stack into virtual registers. -/// -/// FIXME: When we can calculate which args are coming in via registers -/// source them from there instead. void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { - unsigned ArgOffset = 20; // FIXME why is this not 24? + unsigned ArgOffset = 24; unsigned GPR_remaining = 8; unsigned FPR_remaining = 13; unsigned GPR_idx = 0, FPR_idx = 0; @@ -1412,8 +1409,10 @@ void ISel::visitBranchInst(BranchInst &BI) { /// <http://developer.apple.com/documentation/DeveloperTools/Conceptual/MachORuntime/2rt_powerpc_abi/chapter_9_section_5.html> void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, const std::vector<ValueRecord> &Args, bool isVarArg) { - // Count how many bytes are to be pushed on the stack... - unsigned NumBytes = 0; + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. + unsigned NumBytes = 24; + unsigned ArgOffset = 24; if (!Args.empty()) { for (unsigned i = 0, e = Args.size(); i != e; ++i) @@ -1430,12 +1429,16 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, default: assert(0 && "Unknown class!"); } + // Just to be safe, we'll always reserve the full 32 bytes worth of + // argument passing space in case any called code gets funky on us. + if (NumBytes < 24 + 32) NumBytes = 24 + 32; + // Adjust the stack pointer for the new arguments... - BuildMI(BB, PPC32::ADJCALLSTACKDOWN, 1).addSImm(NumBytes); + // These functions are automatically eliminated by the prolog/epilog pass + BuildMI(BB, PPC32::ADJCALLSTACKDOWN, 1).addImm(NumBytes); // Arguments go on the stack in reverse order, as specified by the ABI. // Offset to the paramater area on the stack is 24. - unsigned ArgOffset = 24; int GPR_remaining = 8, FPR_remaining = 13; unsigned GPR_idx = 0, FPR_idx = 0; static const unsigned GPR[] = { @@ -1573,12 +1576,14 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, GPR_idx++; } } else { - BuildMI(BB, PPC32::ADJCALLSTACKDOWN, 1).addSImm(0); + BuildMI(BB, PPC32::ADJCALLSTACKDOWN, 1).addImm(0); } BuildMI(BB, PPC32::IMPLICIT_DEF, 0, PPC32::LR); BB->push_back(CallMI); - BuildMI(BB, PPC32::ADJCALLSTACKUP, 1).addSImm(NumBytes); + + // These functions are automatically eliminated by the prolog/epilog pass + BuildMI(BB, PPC32::ADJCALLSTACKUP, 1).addImm(NumBytes); // If there is a return value, scavenge the result from the location the call // leaves it in... @@ -1592,11 +1597,11 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, // Integral results are in r3 BuildMI(BB, PPC32::OR, 2, Ret.Reg).addReg(PPC32::R3).addReg(PPC32::R3); break; - case cFP32: // Floating-point return values live in f1 + case cFP32: // Floating-point return values live in f1 case cFP64: BuildMI(BB, PPC32::FMR, 1, Ret.Reg).addReg(PPC32::F1); break; - case cLong: // Long values are in r3 hi:r4 lo + case cLong: // Long values are in r3:r4 BuildMI(BB, PPC32::OR, 2, Ret.Reg).addReg(PPC32::R3).addReg(PPC32::R3); BuildMI(BB, PPC32::OR, 2, Ret.Reg+1).addReg(PPC32::R4).addReg(PPC32::R4); break; diff --git a/lib/Target/PowerPC/PowerPCPEI.cpp b/lib/Target/PowerPC/PowerPCPEI.cpp new file mode 100644 index 0000000000..c2d47090e4 --- /dev/null +++ b/lib/Target/PowerPC/PowerPCPEI.cpp @@ -0,0 +1,326 @@ +//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +//===----------------------------------------------------------------------===// +// +// FIXME: The contents of this file should be merged with the target generic +// CodeGen/PrologEpilogInserter.cpp +// +//===----------------------------------------------------------------------===// + +#include "PowerPC.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "Support/Debug.h" +using namespace llvm; + +namespace { + struct PPCPEI : public MachineFunctionPass { + const char *getPassName() const { + return "PowerPC Frame Finalization & Prolog/Epilog Insertion"; + } + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn) { + RegsToSave.clear(); + StackSlots.clear(); + + // Scan the function for modified caller saved registers and insert spill + // code for any caller saved registers that are modified. Also calculate + // the MaxCallFrameSize and HasCalls variables for the function's frame + // information and eliminates call frame pseudo instructions. + calculateCallerSavedRegisters(Fn); + + // Calculate actual frame offsets for all of the abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. + insertPrologEpilogCode(Fn); + + // Add register spills and fills before prolog and after epilog so that in + // the event of a very large fixed size alloca, we don't have to do + // anything weird. + saveCallerSavedRegisters(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + return true; + } + + private: + std::vector<unsigned> RegsToSave; + std::vector<int> StackSlots; + + void calculateCallerSavedRegisters(MachineFunction &Fn); + void saveCallerSavedRegisters(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + }; +} + + +/// createPowerPCPEI - This function returns a pass that inserts +/// prolog and epilog code, and eliminates abstract frame references. +/// +FunctionPass *llvm::createPowerPCPEI() { return new PPCPEI(); } + + +/// calculateCallerSavedRegisters - Scan the function for modified caller saved +/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for +/// the function's frame information and eliminates call frame pseudo +/// instructions. +/// +void PPCPEI::calculateCallerSavedRegisters(MachineFunction &Fn) { + const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo &FrameInfo = *Fn.getTarget().getFrameInfo(); + + // Get the callee saved register list... + const unsigned *CSRegs = RegInfo->getCalleeSaveRegs(); + + // Get the function call frame set-up and tear-down instruction opcode + int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + + // Early exit for targets which have no callee saved registers and no call + // frame setup/destroy pseudo instructions. + if ((CSRegs == 0 || CSRegs[0] == 0) && + FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + return; + + // This bitset contains an entry for each physical register for the target... + std::vector<bool> ModifiedRegs(RegInfo->getNumRegs()); + unsigned MaxCallFrameSize = 0; + bool HasCalls = false; + + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() == 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImmedValue(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + HasCalls = true; + RegInfo->eliminateCallFramePseudoInstr(Fn, *BB, I++); + } else { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (MO.isRegister() && MO.isDef()) { + assert(MRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Register allocation must be performed!"); + ModifiedRegs[MO.getReg()] = true; // Register is modified + } + } + ++I; + } + + MachineFrameInfo *FFI = Fn.getFrameInfo(); + FFI->setHasCalls(HasCalls); + FFI->setMaxCallFrameSize(MaxCallFrameSize); + + // Now figure out which *callee saved* registers are modified by the current + // function, thus needing to be saved and restored in the prolog/epilog. + // + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (ModifiedRegs[Reg]) { + RegsToSave.push_back(Reg); // If modified register... + } else { + for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); + *AliasSet; ++AliasSet) { // Check alias registers too... + if (ModifiedRegs[*AliasSet]) { + RegsToSave.push_back(Reg); + break; + } + } + } + } + + // FIXME: should we sort the regs to save so that we always get the regs in + // the correct order? + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + int Offset = 0; + for (unsigned i = 0, e = RegsToSave.size(); i != e; ++i) { + unsigned RegSize = RegInfo->getRegClass(RegsToSave[i])->getSize(); + int FrameIdx; + + if (RegsToSave[i] == PPC32::LR) { + FrameIdx = FFI->CreateFixedObject(RegSize, 8); // LR lives at +8 + } else { + Offset -= RegSize; + FrameIdx = FFI->CreateFixedObject(RegSize, Offset); + } + StackSlots.push_back(FrameIdx); + } +} + + +/// saveCallerSavedRegisters - Insert spill code for any caller saved registers +/// that are modified in the function. +/// +void PPCPEI::saveCallerSavedRegisters(MachineFunction &Fn) { + // Early exit if no caller saved registers are modified! + if (RegsToSave.empty()) + return; + + const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + + // Now that we have a stack slot for each register to be saved, insert spill + // code into the entry block... + MachineBasicBlock *MBB = Fn.begin(); + MachineBasicBlock::iterator I = MBB->begin(); + for (unsigned i = 0, e = RegsToSave.size(); i != e; ++i) { + const TargetRegisterClass *RC = RegInfo->getRegClass(RegsToSave[i]); + // Insert the spill to the stack frame... + RegInfo->storeRegToStackSlot(*MBB, I, RegsToSave[i], StackSlots[i], RC); + } + + // Add code to restore the callee-save registers in each exiting block. + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + for (MachineFunction::iterator FI = Fn.begin(), E = Fn.end(); FI != E; ++FI) { + // If last instruction is a return instruction, add an epilogue + if (!FI->empty() && TII.isReturn(FI->back().getOpcode())) { + MBB = FI; + I = MBB->end(); --I; + + for (unsigned i = 0, e = RegsToSave.size(); i != e; ++i) { + const TargetRegisterClass *RC = RegInfo->getRegClass(RegsToSave[i]); + RegInfo->loadRegFromStackSlot(*MBB, I, RegsToSave[i],StackSlots[i], RC); + --I; // Insert in reverse order + } + } + } +} + + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects... +/// +void PPCPEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *FFI = Fn.getFrameInfo(); + + unsigned StackAlignment = TFI.getStackAlignment(); + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always positive. + int Offset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + Offset = -Offset; + assert(Offset >= 0 + && "Local area offset should be in direction of stack growth"); + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized objects, + // so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -FFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + // If stack grows down, we need to add size of find the lowest + // address of the object. + if (StackGrowsDown) + Offset += FFI->getObjectSize(i); + + unsigned Align = FFI->getObjectAlignment(i); + assert(Align <= StackAlignment && "Cannot align stack object to higher " + "alignment boundary than the stack itself!"); + Offset = (Offset+Align-1)/Align*Align; // Adjust to Alignment boundary... + + if (StackGrowsDown) { + FFI->setObjectOffset(i, -Offset); // Set the computed offset + } else { + FFI->setObjectOffset(i, Offset); + Offset += FFI->getObjectSize(i); + } + } + + // Set the final value of the stack pointer... + FFI->setStackSize(Offset); +} + + +/// insertPrologEpilogCode - Scan the function for modified caller saved +/// registers, insert spill code for these caller saved registers, then add +/// prolog and epilog code to the function. +/// +void PPCPEI::insertPrologEpilogCode(MachineFunction &Fn) { + // Add prologue to the function... + Fn.getTarget().getRegisterInfo()->emitPrologue(Fn); + + // Add epilogue to restore the callee-save registers in each exiting block + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && TII.isReturn(I->back().getOpcode())) + Fn.getTarget().getRegisterInfo()->emitEpilogue(Fn, *I); + } +} + + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void PPCPEI::replaceFrameIndices(MachineFunction &Fn) { + if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + + const TargetMachine &TM = Fn.getTarget(); + assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const MRegisterInfo &MRI = *TM.getRegisterInfo(); + + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isFrameIndex()) { + // If this instruction has a FrameIndex operand, we need to use that + // target machine register info object to eliminate it. + MRI.eliminateFrameIndex(Fn, I); + break; + } +} diff --git a/lib/Target/PowerPC/PowerPCRegisterInfo.cpp b/lib/Target/PowerPC/PowerPCRegisterInfo.cpp index 2095084741..42f2d3e9e8 100644 --- a/lib/Target/PowerPC/PowerPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PowerPCRegisterInfo.cpp @@ -64,11 +64,12 @@ PowerPCRegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned OC = Opcode[getIdx(RC)]; if (SrcReg == PPC32::LR) { MBB.insert(MI, BuildMI(PPC32::MFLR, 0, PPC32::R0)); - OC = PPC32::STW; - SrcReg = PPC32::R0; + MBB.insert(MI, addFrameReference(BuildMI(OC,3).addReg(PPC32::R0),FrameIdx)); + return 2; + } else { + MBB.insert(MI, addFrameReference(BuildMI(OC, 3).addReg(SrcReg),FrameIdx)); + return 1; } - MBB.insert(MI, addFrameReference(BuildMI(OC, 3).addReg(SrcReg),FrameIdx)); - return 1; } int @@ -80,17 +81,14 @@ PowerPCRegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, PPC32::LBZ, PPC32::LHZ, PPC32::LWZ, PPC32::LFS, PPC32::LFD }; unsigned OC = Opcode[getIdx(RC)]; - bool LoadLR = false; if (DestReg == PPC32::LR) { - DestReg = PPC32::R0; - LoadLR = true; - OC = PPC32::LWZ; - } - MBB.insert(MI, addFrameReference(BuildMI(OC, 2, DestReg), FrameIdx)); - if (LoadLR) + MBB.insert(MI, addFrameReference(BuildMI(OC, 2, PPC32::R0), FrameIdx)); MBB.insert(MI, BuildMI(PPC32::MTLR, 1).addReg(PPC32::R0)); - - return 1; + return 2; + } else { + MBB.insert(MI, addFrameReference(BuildMI(OC, 2, DestReg), FrameIdx)); + return 1; + } } int PowerPCRegisterInfo::copyRegToReg(MachineBasicBlock &MBB, @@ -128,10 +126,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { if (hasFP(MF)) { // If we have a frame pointer, convert as follows: - // adjcallstackdown instruction => 'sub r1, r1, <amt>' and - // adjcallstackup instruction => 'add r1, r1, <amt>' + // ADJCALLSTACKDOWN -> addi, r1, r1, -amount + // ADJCALLSTACKUP -> addi, r1, r1, amount MachineInstr *Old = I; - int Amount = Old->getOperand(0).getImmedValue(); + unsigned Amount = Old->getOperand(0).getImmedValue(); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -174,15 +172,20 @@ PowerPCRegisterInfo::eliminateFrameIndex(MachineFunction &MF, // Take into account whether it's an add or mem instruction unsigned OffIdx = (i == 2) ? 1 : 2; + // Now add the frame object offset to the offset from r1. int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(OffIdx).getImmedValue()+4; + MI.getOperand(OffIdx).getImmedValue(); - if (!hasFP(MF)) + // Fixed offsets have a negative frame index. Fixed negative offests denote + // spilled callee save regs. Fixed positive offset is the va_start offset, + // and needs to be added to the amount we decremented the stack pointer. + // Positive frame indices are regular offsets from the stack pointer, and + // also need the stack size added. + if (FrameIndex >= 0 || (FrameIndex < 0 && Offset >= 24)) Offset += MF.getFrameInfo()->getStackSize(); MI.SetMachineOperandConst(OffIdx,MachineOperand::MO_SignExtendedImmed,Offset); - DEBUG(std::cerr << "offset = " << Offset << std::endl); } @@ -195,36 +198,25 @@ void PowerPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Get the number of bytes to allocate from the FrameInfo unsigned NumBytes = MFI->getStackSize(); - // If we have calls, save the LR value on the stack - if (MFI->hasCalls() || true) { - // When we have no frame pointer, we reserve argument space for call sites - // in the function immediately on entry to the current function. This - // eliminates the need for add/sub brackets around call sites. - NumBytes += MFI->getMaxCallFrameSize() + - 24 + // Predefined PowerPC link area - 32 + // Predefined PowerPC params area - 0 + // local variables - managed by LLVM - 0 * 4 + // volatile GPRs used - managed by LLVM - 0 * 8; // volatile FPRs used - managed by LLVM - - // Round the size to a multiple of the alignment (don't forget the 4 byte - // offset though). - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); - NumBytes = ((NumBytes+4)+Align-1)/Align*Align - 4; - - // Store the incoming LR so it is preserved across calls - MI = BuildMI(PPC32::MFLR, 0, PPC32::R0); - MBB.insert(MBBI, MI); - MI = BuildMI(PPC32::STW, 3).addReg(PPC32::R0).addSImm(8).addReg(PPC32::R1); - MBB.insert(MBBI, MI); + // If we have calls, we cannot use the red zone to store callee save registers + // and we must set up a stack frame, so calculate the necessary size here. + if (MFI->hasCalls()) { + // We reserve argument space for call sites in the function immediately on + // entry to the current function. This eliminates the need for add/sub + // brackets around call sites. + NumBytes += MFI->getMaxCallFrameSize(); } - // Update frame info to pretend that this is part of the stack... - MFI->setStackSize(NumBytes); - // Do we need to allocate space on the stack? if (NumBytes == 0) return; + // Round the size to a multiple of the alignment + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + NumBytes = (NumBytes+Align-1)/Align*Align; + + // Update frame info to pretend that this is part of the stack... + MFI->setStackSize(NumBytes); + // adjust stack pointer: r1 -= numbytes if (NumBytes <= 32768) { MI = BuildMI(PPC32::STWU, 3).addReg(PPC32::R1).addSImm(-NumBytes) @@ -254,35 +246,9 @@ void PowerPCRegisterInfo::emitEpilogue(MachineFunction &MF, // Get the number of bytes allocated from the FrameInfo... unsigned NumBytes = MFI->getStackSize(); - if (NumBytes == 0 && (MFI->hasCalls() || true)) { - // Don't need to adjust the stack pointer, just gotta fix up the LR - MI = BuildMI(PPC32::LWZ, 2,PPC32::R0).addSImm(NumBytes+8).addReg(PPC32::R1); - MBB.insert(MBBI, MI); - MI = BuildMI(PPC32::MTLR, 1).addReg(PPC32::R0); - MBB.insert(MBBI, MI); - } else if (NumBytes <= 32767-8) { - // We're within range to load the return address and restore the stack - // pointer with immediate offsets only. - if (MFI->hasCalls() || true) { - MI = BuildMI(PPC32::LWZ,2,PPC32::R0).addSImm(NumBytes+8).addReg(PPC32::R1); - MBB.insert(MBBI, MI); - MI = BuildMI(PPC32::MTLR, 1).addReg(PPC32::R0); - MBB.insert(MBBI, MI); - } - MI = BuildMI(PPC32::ADDI, 2, PPC32::R1).addReg(PPC32::R1).addSImm(NumBytes); - MBB.insert(MBBI, MI); - } else { + if (NumBytes != 0) { MI = BuildMI(PPC32::LWZ, 2, PPC32::R1).addSImm(0).addReg(PPC32::R1); MBB.insert(MBBI, MI); - // We're not within range to load the return address with an immediate - // offset before restoring the stack pointer, so do it after from its spot - // in the linkage area. - if (MFI->hasCalls() || true) { - MI = BuildMI(PPC32::LWZ, 2, PPC32::R0).addSImm(8).addReg(PPC32::R1); - MBB.insert(MBBI, MI); - MI = BuildMI(PPC32::MTLR, 1).addReg(PPC32::R0); - MBB.insert(MBBI, MI); - } } } diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 3dfdbff9de..e762d94029 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -3,9 +3,6 @@ Currently unimplemented: * signed right shift of long by reg Current bugs: -* large fixed-size allocas not correct, although should - be closer to working. Added code in PPCRegisterInfo.cpp - to do >16bit subtractions to the stack pointer. * ulong to double. ahhh, here's the problem: floatdidf assumes signed longs. so if the high but of a ulong just happens to be set, you get the wrong sign. The fix for this @@ -25,19 +22,14 @@ Current bugs: * linking llvmg++ .s files with gcc instead of g++ Codegen improvements needed: -* no alias analysis causes us to generate slow code for Shootout/matrix +* PowerPCPEI.cpp needs to save/restore regs in the opposite order * setCondInst needs to know branchless versions of seteq/setne/etc * cast elimination pass (uint -> sbyte -> short, kill the byte -> short) * should hint to the branch select pass that it doesn't need to print the second unconditional branch, so we don't end up with things like: -.LBBl42__2E_expand_function_8_21: ; LeafBlock37 - cmplwi cr0, r29, 11 - bne cr0, $+8 b .LBBl42__2E_expand_function_8_674 ; loopentry.24 b .LBBl42__2E_expand_function_8_42 ; NewDefault b .LBBl42__2E_expand_function_8_42 ; NewDefault -* conditional restore of link register (tricky, temporarily backed out - part of first attempt) Current hacks: * lazy insert of GlobalBaseReg definition at front of first MBB @@ -49,7 +41,6 @@ Current hacks: Currently failing tests: * SingleSource `- Regression - | `- 2003-05-22-VarSizeArray | `- casts (ulong to fp failure) `- Benchmarks | `- Shootout-C++ : most programs fail, miscompilations |