aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC/PPCRegisterInfo.cpp
diff options
context:
space:
mode:
authorArnold Schwaighofer <arnold.schwaighofer@gmail.com>2008-04-30 09:16:33 +0000
committerArnold Schwaighofer <arnold.schwaighofer@gmail.com>2008-04-30 09:16:33 +0000
commit30e62c098b5841259f8026df1c5c45c7c1182a38 (patch)
tree7a4da70d35c785bd6bd03768b4bf7ba3c42e24d2 /lib/Target/PowerPC/PPCRegisterInfo.cpp
parent4f06a0f6d583691bda2cf6e37d0c35e128bc4b8d (diff)
Tail call optimization improvements:
Move platform independent code (lowering of possibly overwritten arguments, check for tail call optimization eligibility) from target X86ISelectionLowering.cpp to TargetLowering.h and SelectionDAGISel.cpp. Initial PowerPC tail call implementation: Support ppc32 implemented and tested (passes my tests and test-suite llvm-test). Support ppc64 implemented and half tested (passes my tests). On ppc tail call optimization is performed if caller and callee are fastcc call is a tail call (in tail call position, call followed by ret) no variable argument lists or byval arguments option -tailcallopt is enabled Supported: * non pic tail calls on linux/darwin * module-local tail calls on linux(PIC/GOT)/darwin(PIC) * inter-module tail calls on darwin(PIC) If constraints are not met a normal call will be emitted. A test checking the argument lowering behaviour on x86-64 was added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50477 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/PowerPC/PPCRegisterInfo.cpp')
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp182
1 files changed, 173 insertions, 9 deletions
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 984021068b..d8ea207eca 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -19,6 +19,7 @@
#include "PPCRegisterInfo.h"
#include "PPCFrameInfo.h"
#include "PPCSubtarget.h"
+#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Type.h"
@@ -332,7 +333,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
//
static bool needsFP(const MachineFunction &MF) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return NoFramePointerElim || MFI->hasVarSizedObjects();
+ return NoFramePointerElim || MFI->hasVarSizedObjects() ||
+ (PerformTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
static bool spillsCR(const MachineFunction &MF) {
@@ -399,9 +401,42 @@ static bool MustSaveLR(const MachineFunction &MF) {
MF.getFrameInfo()->hasCalls();
}
+
+
void PPCRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
+ if (PerformTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually substract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ MachineInstr * New = NULL;
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+
+ if (isInt16(CalleeAmt)) {
+ New = BuildMI(TII.get(ADDIInstr), StackReg).addReg(StackReg).
+ addImm(CalleeAmt);
+ MBB.insert(I, New);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, false, false, true)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(StackReg)
+ .addReg(TmpReg);
+ }
+ }
+ }
// Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
MBB.erase(I);
}
@@ -924,6 +959,13 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
FI->setFramePointerSaveIndex(FPSI);
}
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) {
+ int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0;
+ MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta,
+ AddFPOffsetAmount + TCSPDelta);
+ }
// Reserve a slot closest to SP or frame pointer if we have a dynalloc or
// a large stack, which will require scavenging a register to materialize a
// large offset.
@@ -1160,7 +1202,15 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = prior(MBB.end());
- assert(MBBI->getOpcode() == PPC::BLR &&
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ assert( (RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8) &&
"Can only insert epilog into returning blocks");
// Get alignment info so we know how to restore r1
@@ -1169,7 +1219,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
unsigned MaxAlign = MFI->getMaxAlignment();
// Get the number of bytes allocated from the FrameInfo.
- unsigned FrameSize = MFI->getStackSize();
+ int FrameSize = MFI->getStackSize();
// Get processor type.
bool IsPPC64 = Subtarget.isPPC64();
@@ -1183,19 +1233,75 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert( StackAdjust.isImmediate() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+
if (FrameSize) {
// The loaded (or persistent) stack pointer value is offset by the 'stwu'
// on entry to the function. Add this offset back now.
- if (!Subtarget.isPPC64()) {
- if (isInt16(FrameSize) && (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R1).addImm(FrameSize);
+ if (!IsPPC64) {
+ // If this function contained a fastcc call and PerformTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall() && isInt16(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, TII.get(PPC::LIS), PPC::R0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, false, false, true)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, TII.get(PPC::ADD4))
+ .addReg(PPC::R1)
+ .addReg(PPC::R31)
+ .addReg(PPC::R0);
+ } else if (isInt16(FrameSize) &&
+ (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R1).addImm(FrameSize);
} else {
BuildMI(MBB, MBBI, TII.get(PPC::LWZ),PPC::R1).addImm(0).addReg(PPC::R1);
}
} else {
- if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
+ if (FI->hasFastCall() && isInt16(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, false, false, true)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, TII.get(PPC::ADD8))
+ .addReg(PPC::X1)
+ .addReg(PPC::X31)
+ .addReg(PPC::X0);
+ } else if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
!MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, TII.get(PPC::ADDI8), PPC::X1)
.addReg(PPC::X1).addImm(FrameSize);
@@ -1228,6 +1334,64 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
if (UsesLR)
BuildMI(MBB, MBBI, TII.get(PPC::MTLR)).addReg(PPC::R0);
}
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (PerformTailCallOpt && RetOpcode == PPC::BLR &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+ unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1;
+ unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31;
+ unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI;
+
+ if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, TII.get(LISInstr), TmpReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, false, false, true)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(FPReg)
+ .addReg(TmpReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ assert(JumpTarget.isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ assert(JumpTarget.isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
}
unsigned PPCRegisterInfo::getRARegister() const {