diff options
author | Jeffrey Yasskin <jyasskin@google.com> | 2009-11-16 22:41:33 +0000 |
---|---|---|
committer | Jeffrey Yasskin <jyasskin@google.com> | 2009-11-16 22:41:33 +0000 |
commit | d1ba06bf131a9d217426529d2e28af1f2eeed47a (patch) | |
tree | 72ae565430358edb4e81b988c8725938c1f60763 /lib/Target/X86 | |
parent | da589a3a963e6cc179d850c5fd395d3e10ce741c (diff) |
Make X86-64 in the Large model always emit 64-bit calls.
The large code model is documented at
http://www.x86-64.org/documentation/abi.pdf and says that calls should
assume their target doesn't live within the 32-bit pc-relative offset
that fits in the call instruction.
To do this, we turn off the global-address->target-global-address
conversion in X86TargetLowering::LowerCall(). The first attempt at
this broke the lazy JIT because it can separate the movabs(imm->reg)
from the actual call instruction. The lazy JIT receives the address of
the movabs as a relocation and needs to record the return address from
the call; and then when that call happens, it needs to patch the
movabs with the newly-compiled target. We could thread the call
instruction into the relocation and record the movabs<->call mapping
explicitly, but that seems to require at least as much new
complication in the code generator as this change.
To fix this, we make lazy functions _always_ go through a call
stub. You'd think we'd only have to force lazy calls through a stub on
difficult platforms, but that turns out to break indirect calls
through a function pointer. The right fix for that is to distinguish
between calls and address-of operations on uncompiled functions, but
that's complex enough to leave for someone else to do.
Another attempt at this defined a new CALL64i pseudo-instruction,
which expanded to a 2-instruction sequence in the assembly output and
was special-cased in the X86CodeEmitter's emitInstruction()
function. That broke indirect calls in the same way as above.
This patch also removes a hack forcing Darwin to the small code model.
Without far-call-stubs, the small code model requires things of the
JITMemoryManager that the DefaultJITMemoryManager can't provide.
Thanks to echristo for lots of testing!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@88984 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86CodeEmitter.cpp | 27 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 21 | ||||
-rw-r--r-- | lib/Target/X86/X86JITInfo.cpp | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 16 |
4 files changed, 30 insertions, 39 deletions
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ab49b9eba3..4497931e86 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -82,7 +82,7 @@ namespace { void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0, - bool MayNeedFarStub = false, bool Indirect = false); + bool Indirect = false); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0); @@ -176,7 +176,6 @@ template<class CodeEmitter> void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp /* = 0 */, intptr_t PCAdj /* = 0 */, - bool MayNeedFarStub /* = false */, bool Indirect /* = false */) { intptr_t RelocCST = Disp; if (Reloc == X86::reloc_picrel_word) @@ -185,9 +184,9 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, RelocCST = PCAdj; MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, MayNeedFarStub) + GV, RelocCST, false) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, MayNeedFarStub); + GV, RelocCST, false); MCE.addRelocation(MR); // The relocated value will be added to the displacement if (Reloc == X86::reloc_absolute_dword) @@ -333,10 +332,9 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp, // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute - bool MayNeedFarStub = isa<Function>(RelocOp->getGlobal()); bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), - Adj, MayNeedFarStub, Indirect); + Adj, Indirect); } else if (RelocOp->isSymbol()) { emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); } else if (RelocOp->isCPI()) { @@ -633,14 +631,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, } if (MO.isGlobal()) { - // Assume undefined functions may be outside the Small codespace. - bool MayNeedFarStub = - (Is64BitMode && - (TM.getCodeModel() == CodeModel::Large || - TM.getSubtarget<X86Subtarget>().isTargetDarwin())) || - Opcode == X86::TAILJMPd; emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, - MO.getOffset(), 0, MayNeedFarStub); + MO.getOffset(), 0); break; } @@ -681,10 +673,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64ri) rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool MayNeedFarStub = isa<Function>(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - MayNeedFarStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -790,10 +781,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64ri32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool MayNeedFarStub = isa<Function>(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - MayNeedFarStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -831,10 +821,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64mi32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO.isGlobal()) { - bool MayNeedFarStub = isa<Function>(MO.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO, TM); emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - MayNeedFarStub, Indirect); + Indirect); } else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), rt); else if (MO.isCPI()) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dacb2c3b25..6018cf5ef7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1937,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, FPDiff, dl); } - // If the callee is a GlobalAddress node (quite common, every direct call is) - // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + bool WasGlobalOrExternal = false; + if (getTargetMachine().getCodeModel() == CodeModel::Large) { + assert(Is64Bit && "Large code model is only legal in 64-bit mode."); + // In the 64-bit large code model, we have to make all calls + // through a register, since the call instruction's 32-bit + // pc-relative offset may not be large enough to hold the whole + // address. + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + WasGlobalOrExternal = true; + // If the callee is a GlobalAddress node (quite common, every direct call + // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack + // it. + // We should use extra load for direct calls to dllimported functions in // non-JIT mode. GlobalValue *GV = G->getGlobal(); @@ -1967,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -1984,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlags); - } else if (isTailCall) { + } + + if (isTailCall && !WasGlobalOrExternal) { unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 62ca47ff78..0792bdd4dd 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { // Rewrite the call target... so that we don't end up here every time we // execute the call. #if defined (X86_64_JIT) - if (!isStub) - *(intptr_t *)(RetAddr - 0xa) = NewVal; + assert(isStub && + "X86-64 doesn't support rewriting non-stub lazy compilation calls:" + " the call instruction varies too much."); #else *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4); #endif diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 5d58a8779f..0cda8bc408 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -185,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86CodeEmitterPass(*this, MCE)); @@ -211,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86JITCodeEmitterPass(*this, JCE)); |