aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-11-30 23:55:39 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-11-30 23:55:39 +0000
commit3d2125c9dbac695c93f42c0f59fd040e413fd711 (patch)
tree1f7e426fb325a457b03fafbc84f4f6446af3c270 /lib/Target
parent3f8c110dc643847363686d543a56c23c41353ab0 (diff)
Enable sibling call optimization of libcalls which are expanded during
legalization time. Since at legalization time there is no mapping from SDNode back to the corresponding LLVM instruction and the return SDNode is target specific, this requires a target hook to check for eligibility. Only x86 and ARM support this form of sibcall optimization right now. rdar://8707777 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120501 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARMFrameInfo.cpp23
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp73
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/X86/README.txt18
-rw-r--r--lib/Target/X86/X86FrameInfo.cpp15
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp41
-rw-r--r--lib/Target/X86/X86ISelLowering.h2
7 files changed, 112 insertions, 62 deletions
diff --git a/lib/Target/ARM/ARMFrameInfo.cpp b/lib/Target/ARM/ARMFrameInfo.cpp
index e2531d04d0..58a7c61d90 100644
--- a/lib/Target/ARM/ARMFrameInfo.cpp
+++ b/lib/Target/ARM/ARMFrameInfo.cpp
@@ -374,16 +374,19 @@ void ARMFrameInfo::emitEpilogue(MachineFunction &MF,
MachineOperand &JumpTarget = MBBI->getOperand(0);
// Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- } else if (RetOpcode == ARM::TCRETURNdiND) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+ unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+ ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
+ : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
addReg(JumpTarget.getReg(), RegState::Kill);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index da274de5a0..3c90704b7e 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1519,30 +1519,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// whether LR is going to be used. Probably the right approach is to
// generate the tail call here and turn it back into CALL/RET in
// emitEpilogue if LR is used.
- if (Subtarget->isThumb1Only())
- return false;
-
- // For the moment, we can only do this to functions defined in this
- // compilation, or to indirect calls. A Thumb B to an ARM function,
- // or vice versa, is not easily fixed up in the linker unlike BL.
- // (We could do this by loading the address of the callee into a register;
- // that is an extra instruction over the direct call and burns a register
- // as well, so is not likely to be a win.)
-
- // It might be safe to remove this restriction on non-Darwin.
// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
// but we need to make sure there are enough registers; the only valid
// registers are the 4 used for parameters. We don't currently do this
// case.
- if (isa<ExternalSymbolSDNode>(Callee))
- return false;
-
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- if (GV->isDeclaration() || GV->isWeakForLinker())
- return false;
- }
+ if (Subtarget->isThumb1Only())
+ return false;
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
@@ -1720,6 +1703,58 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
return result;
}
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ unsigned NumCopies = 0;
+ SDNode* Copies[2];
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg) {
+ Copies[NumCopies++] = Use;
+ } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ // f64 returned in a pair of GPRs.
+ for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ Copies[UI.getUse().getResNo()] = *UI;
+ ++NumCopies;
+ }
+ } else if (Use->getOpcode() == ISD::BITCAST) {
+ // f32 returned in a single GPR.
+ if (!Use->hasNUsesOfValue(1, 0))
+ return false;
+ Use = *Use->use_begin();
+ if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ return false;
+ Copies[NumCopies++] = Use;
+ } else {
+ return false;
+ }
+
+ if (NumCopies != 1 && NumCopies != 2)
+ return false;
+ for (unsigned i = 0; i < NumCopies; ++i) {
+ SDNode *Copy = Copies[i];
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ SDNode *Use = *UI;
+ if (Use == Copies[0] || Use == Copies[1])
+ continue;
+ return false;
+ }
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ }
+ }
+
+ return true;
+}
+
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 38f7399110..6d11bad357 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -421,6 +421,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index e67fab1090..a305ae6ec5 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -895,24 +895,6 @@ compare:
//===---------------------------------------------------------------------===//
-Linux is missing some basic tail call support:
-
-#include <math.h>
-double foo(double a) { return sin(a); }
-
-This compiles into this on x86-64 Linux (but not darwin):
-foo:
- subq $8, %rsp
- call sin
- addq $8, %rsp
- ret
-vs:
-
-foo:
- jmp sin
-
-//===---------------------------------------------------------------------===//
-
Tail call optimization improvements: Tail call optimization currently
pushes all arguments on the top of the stack (their normal place for
non-tail call optimized calls) that source from the callers arguments
diff --git a/lib/Target/X86/X86FrameInfo.cpp b/lib/Target/X86/X86FrameInfo.cpp
index 494e905028..c47b0fa697 100644
--- a/lib/Target/X86/X86FrameInfo.cpp
+++ b/lib/Target/X86/X86FrameInfo.cpp
@@ -712,10 +712,17 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
// Jump to label or value in register.
if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
- BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
- ? X86::TAILJMPd : X86::TAILJMPd64)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+ ? X86::TAILJMPd : X86::TAILJMPd64));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
} else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6d6d83d557..bc6d84eeec 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1338,6 +1338,23 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MVT::Other, &RetOps[0], RetOps.size());
}
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg)
+ return false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI)
+ if (UI->getOpcode() != X86ISD::RET_FLAG)
+ return false;
+
+ return true;
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
@@ -2142,17 +2159,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
- // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
- // symbols should go through the PLT.
- if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- OpFlags = X86II::MO_PLT;
- } else if (Subtarget->isPICStyleStubAny() &&
- Subtarget->getDarwinVers() < 9) {
- // PC-relative references to external symbols should go through $stub,
- // unless we're building with the leopard linker or later, which
- // automatically synthesizes these stubs.
- OpFlags = X86II::MO_DARWIN_STUB;
+ if (!isTailCall) {
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+ // external symbols should go through the PLT.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ Subtarget->getDarwinVers() < 9) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index b521702013..edcdceb39a 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -805,6 +805,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,