aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold Schwaighofer <arnold.schwaighofer@gmail.com>2008-04-30 09:16:33 +0000
committerArnold Schwaighofer <arnold.schwaighofer@gmail.com>2008-04-30 09:16:33 +0000
commit30e62c098b5841259f8026df1c5c45c7c1182a38 (patch)
tree7a4da70d35c785bd6bd03768b4bf7ba3c42e24d2
parent4f06a0f6d583691bda2cf6e37d0c35e128bc4b8d (diff)
Tail call optimization improvements:
Move platform independent code (lowering of possibly overwritten arguments, check for tail call optimization eligibility) from target X86ISelectionLowering.cpp to TargetLowering.h and SelectionDAGISel.cpp. Initial PowerPC tail call implementation: Support ppc32 implemented and tested (passes my tests and test-suite llvm-test). Support ppc64 implemented and half tested (passes my tests). On ppc tail call optimization is performed if caller and callee are fastcc call is a tail call (in tail call position, call followed by ret) no variable argument lists or byval arguments option -tailcallopt is enabled Supported: * non pic tail calls on linux/darwin * module-local tail calls on linux(PIC/GOT)/darwin(PIC) * inter-module tail calls on darwin(PIC) If constraints are not met a normal call will be emitted. A test checking the argument lowering behaviour on x86-64 was added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50477 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetLowering.h30
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp78
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp3
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp571
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h27
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td48
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td63
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h30
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp182
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp197
-rw-r--r--lib/Target/X86/X86ISelLowering.h9
-rw-r--r--lib/Target/X86/X86InstrInfo.td1
-rw-r--r--test/CodeGen/PowerPC/tailcall1-64.ll11
-rw-r--r--test/CodeGen/PowerPC/tailcall1.ll11
-rw-r--r--test/CodeGen/PowerPC/tailcallpic1.ll14
16 files changed, 985 insertions, 294 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 9b4c8af95d..5f8c1c2ae4 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1033,6 +1033,36 @@ public:
return false;
}
+ /// CheckTailCallReturnConstraints - Check whether CALL node immediatly
+ /// preceeds the RET node and whether the return uses the result of the node
+ /// or is a void return. This function can be used by the target to determine
+ /// eligiblity of tail call optimization.
+ static bool CheckTailCallReturnConstraints(SDOperand Call, SDOperand Ret) {
+ unsigned NumOps = Ret.getNumOperands();
+ if ((NumOps == 1 &&
+ (Ret.getOperand(0) == SDOperand(Call.Val,1) ||
+ Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
+ (NumOps > 1 &&
+ Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
+ Ret.getOperand(1) == SDOperand(Call.Val,0)))
+ return true;
+ return false;
+ }
+
+ /// GetPossiblePreceedingTailCall - Get preceeding TailCallNodeOpCode node if
+ /// it exists skip possible ISD:TokenFactor.
+ static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain,
+ unsigned TailCallNodeOpCode) {
+ if (Chain.getOpcode() == TailCallNodeOpCode) {
+ return Chain;
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
+ if (Chain.getNumOperands() &&
+ Chain.getOperand(0).getOpcode() == TailCallNodeOpCode)
+ return Chain.getOperand(0);
+ }
+ return Chain;
+ }
+
/// CustomPromoteOperation - This callback is invoked for operations that are
/// unsupported by the target, are registered to use 'custom' lowering, and
/// whose type needs to be promoted.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 530ffd754b..c5911e5d66 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -4612,6 +4612,40 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
}
}
+/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and
+/// whether object offset >= 0.
+static bool
+IsFixedFrameObjectWithPosOffset(MachineFrameInfo * MFI, SDOperand Op) {
+ if (!isa<FrameIndexSDNode>(Op)) return false;
+
+ FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op);
+ int FrameIdx = FrameIdxNode->getIndex();
+ return MFI->isFixedObjectIndex(FrameIdx) &&
+ MFI->getObjectOffset(FrameIdx) >= 0;
+}
+
+/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
+/// possibly be overwritten when lowering the outgoing arguments in a tail
+/// call. Currently the implementation of this call is very conservative and
+/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
+/// virtual registers would be overwritten by direct lowering.
+static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op,
+ MachineFrameInfo * MFI) {
+ RegisterSDNode * OpReg = NULL;
+ if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
+ (Op.getOpcode()== ISD::CopyFromReg &&
+ (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
+ (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
+ (Op.getOpcode() == ISD::LOAD &&
+ IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) ||
+ (Op.getOpcode() == ISD::MERGE_VALUES &&
+ Op.getOperand(Op.ResNo).getOpcode() == ISD::LOAD &&
+ IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.ResNo).
+ getOperand(1))))
+ return true;
+ return false;
+}
+
/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the
/// DAG and fixes their tailcall attribute operand.
static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,
@@ -4636,19 +4670,51 @@ static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,
// eligible (no RET or the target rejects) the attribute is fixed to
// false. The TargetLowering::IsEligibleForTailCallOptimization function
// must correctly identify tail call optimizable calls.
- if (isMarkedTailCall &&
- (Ret==NULL ||
- !TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG))) {
+ if (!isMarkedTailCall) continue;
+ if (Ret==NULL ||
+ !TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG)) {
+ // Not eligible. Mark CALL node as non tail call.
SmallVector<SDOperand, 32> Ops;
unsigned idx=0;
- for(SDNode::op_iterator I =OpCall.Val->op_begin(),
- E=OpCall.Val->op_end(); I!=E; I++, idx++) {
+ for(SDNode::op_iterator I =OpCall.Val->op_begin(),
+ E = OpCall.Val->op_end(); I != E; I++, idx++) {
if (idx!=3)
Ops.push_back(*I);
- else
+ else
Ops.push_back(DAG.getConstant(false, TLI.getPointerTy()));
}
DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
+ } else {
+ // Look for tail call clobbered arguments. Emit a series of
+ // copyto/copyfrom virtual register nodes to protect them.
+ SmallVector<SDOperand, 32> Ops;
+ SDOperand Chain = OpCall.getOperand(0), InFlag;
+ unsigned idx=0;
+ for(SDNode::op_iterator I = OpCall.Val->op_begin(),
+ E = OpCall.Val->op_end(); I != E; I++, idx++) {
+ SDOperand Arg = *I;
+ if (idx > 4 && (idx % 2)) {
+ bool isByVal = cast<ARG_FLAGSSDNode>(OpCall.getOperand(idx+1))->
+ getArgFlags().isByVal();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (!isByVal &&
+ IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
+ MVT::ValueType VT = Arg.getValueType();
+ unsigned VReg = MF.getRegInfo().
+ createVirtualRegister(TLI.getRegClassFor(VT));
+ Chain = DAG.getCopyToReg(Chain, VReg, Arg, InFlag);
+ InFlag = Chain.getValue(1);
+ Arg = DAG.getCopyFromReg(Chain, VReg, VT, InFlag);
+ Chain = Arg.getValue(1);
+ InFlag = Arg.getValue(2);
+ }
+ }
+ Ops.push_back(Arg);
+ }
+ // Link in chain of CopyTo/CopyFromReg.
+ Ops[0] = Chain;
+ DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
}
}
}
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 50c9664fbc..c0621a074f 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -143,7 +143,8 @@ int PPCCodeEmitter::getMachineOpValue(MachineInstr &MI, MachineOperand &MO) {
MO.isConstantPoolIndex() || MO.isJumpTableIndex()) {
unsigned Reloc = 0;
if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho ||
- MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF)
+ MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF ||
+ MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8)
Reloc = PPC::reloc_pcrel_bx;
else {
if (TM.getRelocationModel() == Reloc::PIC_) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index d3f410ed67..37b8e7bb11 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -26,9 +26,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ParameterAttributes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -412,6 +414,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::MTFSF: return "PPCISD::MTFSF";
+ case PPCISD::TAILCALL: return "PPCISD::TAILCALL";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
}
}
@@ -1317,6 +1321,20 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
return FPR;
}
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(SDOperand Arg, SDOperand Flag,
+ bool isVarArg, unsigned PtrByteSize) {
+ MVT::ValueType ArgVT = Arg.getValueType();
+ ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Flag)->getArgFlags();
+ unsigned ArgSize =MVT::getSizeInBits(ArgVT)/8;
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
SDOperand
PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
SelectionDAG &DAG,
@@ -1338,10 +1356,15 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
bool isPPC64 = PtrVT == MVT::i64;
bool isMachoABI = Subtarget.isMachoABI();
bool isELF32_ABI = Subtarget.isELF32_ABI();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ unsigned CC = MF.getFunction()->getCallingConv();
+ bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
-
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
static const unsigned GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1426,7 +1449,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
// even GPR_idx value or to an even ArgOffset value.
SmallVector<SDOperand, 8> MemOps;
-
+ unsigned nAltivecParamsAtEnd = 0;
for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
SDOperand ArgVal;
bool needsLoad = false;
@@ -1440,6 +1463,23 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
unsigned CurArgOffset = ArgOffset;
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+ Op.getOperand(ArgNo+3),
+ isVarArg,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+ Op.getOperand(ArgNo+3),
+ isVarArg,
+ PtrByteSize);
+
// FIXME alignment for ELF may not be right
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
@@ -1614,7 +1654,8 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
// that we ran out of physical registers of the appropriate type.
if (needsLoad) {
int FI = MFI->CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize));
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
}
@@ -1622,6 +1663,25 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
ArgValues.push_back(ArgVal);
}
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
@@ -1720,6 +1780,131 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
}
+/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
+/// linkage area.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+ bool isPPC64,
+ bool isMachoABI,
+ bool isVarArg,
+ unsigned CC,
+ SDOperand Call,
+ unsigned &nAltivecParamsAtEnd) {
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ unsigned NumOps = (Call.getNumOperands() - 5) / 2;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDOperand Arg = Call.getOperand(5+2*i);
+ SDOperand Flag = Call.getOperand(5+2*i+1);
+ MVT::ValueType ArgVT = Arg.getValueType();
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(Arg, Flag, isVarArg, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+
+ // Tail call needs the stack to be aligned.
+ if (CC==CallingConv::Fast && PerformTailCallOpt) {
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ }
+
+ return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accomodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
+ unsigned ParamSize) {
+
+ if (!IsTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
+ SDOperand Ret,
+ SelectionDAG& DAG) const {
+ // Variable argument functions are not supported.
+ if (!PerformTailCallOpt ||
+ cast<ConstantSDNode>(Call.getOperand(2))->getValue() != 0) return false;
+
+ if (CheckTailCallReturnConstraints(Call, Ret)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != ((Call.getNumOperands()-5)/2); i++) {
+ ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Call.getOperand(5+2*i+1))
+ ->getArgFlags();
+ if (Flags.isByVal()) return false;
+ }
+
+ SDOperand Callee = Call.getOperand(4);
+ // Non PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
+
/// isCallCompatibleAddress - Return the immediate to use if the specified
/// 32-bit value is representable in the immediate field of a BxA instruction.
static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
@@ -1735,6 +1920,102 @@ static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
DAG.getTargetLoweringInfo().getPointerTy()).Val;
}
+struct TailCallArgumentInfo {
+ SDOperand Arg;
+ SDOperand FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDOperand Chain,
+ const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+ SmallVector<SDOperand, 8> &MemOpChains) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDOperand Arg = TailCallArgs[i].Arg;
+ SDOperand FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN,
+ PseudoSourceValue::getFixedStack(),
+ FI));
+ }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDOperand EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDOperand Chain,
+ SDOperand OldRetAddr,
+ SDOperand OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isMachoABI) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
+ isMachoABI);
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc);
+ int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
+ isMachoABI);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
+
+ MVT::ValueType VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(Chain, OldRetAddr, NewRetAddrFrIdx,
+ PseudoSourceValue::getFixedStack(), NewRetAddr);
+ SDOperand NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(Chain, OldFP, NewFramePtrIdx,
+ PseudoSourceValue::getFixedStack(), NewFPIdx);
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDOperand Arg, int SPDiff, unsigned ArgOffset,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (MVT::getSizeInBits(Arg.getValueType())+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ MVT::ValueType VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDOperand FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDOperand PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDOperand Chain,
+ SDOperand &LROpOut,
+ SDOperand &FPOpOut) {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ MVT::ValueType VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, Chain, LROpOut, NULL, 0);
+ Chain = SDOperand(LROpOut.Val, 1);
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, Chain, FPOpOut, NULL, 0);
+ Chain = SDOperand(FPOpOut.Val, 1);
+ }
+ return Chain;
+}
+
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" of size "Size". Alignment information is
/// specified by the specific parameter attribute. The copy will be passed as
@@ -1750,11 +2031,39 @@ CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
NULL, 0, NULL, 0);
}
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDOperand Chain,
+ SDOperand Arg, SDOperand PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVector<SDOperand, 8> &MemOpChains,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ if (!isTailCall) {
+ if (isVector) {
+ SDOperand StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget,
TargetMachine &TM) {
SDOperand Chain = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 &&
+ CC == CallingConv::Fast && PerformTailCallOpt;
SDOperand Callee = Op.getOperand(4);
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
@@ -1765,58 +2074,32 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
bool isPPC64 = PtrVT == MVT::i64;
unsigned PtrByteSize = isPPC64 ? 8 : 4;
+ MachineFunction &MF = DAG.getMachineFunction();
+
// args_to_use will accumulate outgoing args for the PPCISD::CALL case in
// SelectExpr to use to put the arguments in the appropriate registers.
std::vector<SDOperand> args_to_use;
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (PerformTailCallOpt && CC==CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
-
- // Add up all the space actually used.
- // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
- // they all go in registers, but we must reserve stack space for them for
- // possible use by the caller. In varargs or 64-bit calls, parameters are
- // assigned stack space in order, with padding so Altivec parameters are
- // 16-byte aligned.
- unsigned nAltivecParamsAtEnd = 0;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDOperand Arg = Op.getOperand(5+2*i);
- MVT::ValueType ArgVT = Arg.getValueType();
- if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
- if (!isVarArg && !isPPC64) {
- // Non-varargs Altivec parameters go after all the non-Altivec parameters;
- // do those last so we know how much padding we need.
- nAltivecParamsAtEnd++;
- continue;
- } else {
- // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
- NumBytes = ((NumBytes+15)/16)*16;
- }
- }
- ISD::ArgFlagsTy Flags =
- cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags();
- unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;
- if (Flags.isByVal())
- ArgSize = Flags.getByValSize();
- ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
- NumBytes += ArgSize;
- }
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- NumBytes = ((NumBytes+15)/16)*16;
- NumBytes += 16*nAltivecParamsAtEnd;
- }
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC,
+ Op, nAltivecParamsAtEnd);
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- NumBytes = std::max(NumBytes,
- PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
@@ -1824,6 +2107,11 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
DAG.getConstant(NumBytes, PtrVT));
SDOperand CallSeqStart = Chain;
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDOperand LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp);
+
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
// passing.
@@ -1861,6 +2149,8 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
SmallVector<SDOperand, 8> MemOpChains;
for (unsigned i = 0; i != NumOps; ++i) {
bool inMem = false;
@@ -1959,7 +2249,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
if (GPR_idx != NumGPRs) {
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments);
inMem = true;
}
if (inMem || isMachoABI) {
@@ -2007,7 +2299,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
}
}
} else {
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments);
inMem = true;
}
if (inMem || isMachoABI) {
@@ -2058,6 +2352,7 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
}
break;
}
+
// Non-varargs Altivec params generally go in registers, but have
// stack space allocated at the end.
if (VR_idx != NumVRs) {
@@ -2065,10 +2360,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
} else if (nAltivecParamsAtEnd==0) {
// We are emitting Altivec params in order.
- PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, PtrVT));
- SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
- MemOpChains.push_back(Store);
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments);
ArgOffset += 16;
}
break;
@@ -2090,10 +2384,11 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
if (++j > NumVRs) {
- SDOperand PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, PtrVT));
- SDOperand Store = D