diff options
author | Evan Cheng <evan.cheng@apple.com> | 2006-09-08 06:48:29 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2006-09-08 06:48:29 +0000 |
commit | 25ab690a43cbbb591b76d49e3595b019c32f4b3f (patch) | |
tree | fe952a3e394b9f01b6ce8ed8691cee8c507ed094 | |
parent | 1e5fb6928c510bc945dbcd23d99022288ad7e863 (diff) |
Committing X86-64 support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30177 91177308-0d34-0410-b5e6-96231b3b80d8
25 files changed, 3603 insertions, 462 deletions
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt new file mode 100644 index 0000000000..af3e273237 --- /dev/null +++ b/lib/Target/X86/README-X86-64.txt @@ -0,0 +1,269 @@ +//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// + +Implement different PIC models? Right now we only support Mac OS X with small +PIC code model. + +//===---------------------------------------------------------------------===// + +Make use of "Red Zone". + +//===---------------------------------------------------------------------===// + +Implement __int128 and long double support. + +//===---------------------------------------------------------------------===// + +For this: + +extern void xx(void); +void bar(void) { + xx(); +} + +gcc compiles to: + +.globl _bar +_bar: + jmp _xx + +We need to do the tailcall optimization as well. + +//===---------------------------------------------------------------------===// + +For this: + +int test(int a) +{ + return a * 3; +} + +We generates + leal (%edi,%edi,2), %eax + +We should be generating + leal (%rdi,%rdi,2), %eax + +instead. The later form does not require an address-size prefix 67H. + +It's probably ok to simply emit the corresponding 64-bit super class registers +in this case? + + +//===---------------------------------------------------------------------===// + +AMD64 Optimization Manual 8.2 has some nice information about optimizing integer +multiplication by a constant. How much of it applies to Intel's X86-64 +implementation? There are definite trade-offs to consider: latency vs. register +pressure vs. code size. + +//===---------------------------------------------------------------------===// + +Are we better off using branches instead of cmove to implement FP to +unsigned i64? + +_conv: + ucomiss LC0(%rip), %xmm0 + cvttss2siq %xmm0, %rdx + jb L3 + subss LC0(%rip), %xmm0 + movabsq $-9223372036854775808, %rax + cvttss2siq %xmm0, %rdx + xorq %rax, %rdx +L3: + movq %rdx, %rax + ret + +instead of + +_conv: + movss LCPI1_0(%rip), %xmm1 + cvttss2siq %xmm0, %rcx + movaps %xmm0, %xmm2 + subss %xmm1, %xmm2 + cvttss2siq %xmm2, %rax + movabsq $-9223372036854775808, %rdx + xorq %rdx, %rax + ucomiss %xmm1, %xmm0 + cmovb %rcx, %rax + ret + +Seems like the jb branch has high likelyhood of being taken. It would have +saved a few instructions. + +//===---------------------------------------------------------------------===// + +Poor codegen: + +int X[2]; +int b; +void test(void) { + memset(X, b, 2*sizeof(X[0])); +} + +llc: + movq _b@GOTPCREL(%rip), %rax + movzbq (%rax), %rax + movq %rax, %rcx + shlq $8, %rcx + orq %rax, %rcx + movq %rcx, %rax + shlq $16, %rax + orq %rcx, %rax + movq %rax, %rcx + shlq $32, %rcx + movq _X@GOTPCREL(%rip), %rdx + orq %rax, %rcx + movq %rcx, (%rdx) + ret + +gcc: + movq _b@GOTPCREL(%rip), %rax + movabsq $72340172838076673, %rdx + movzbq (%rax), %rax + imulq %rdx, %rax + movq _X@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) + ret + +//===---------------------------------------------------------------------===// + +Vararg function prologue can be further optimized. Currently all XMM registers +are stored into register save area. Most of them can be eliminated since the +upper bound of the number of XMM registers used are passed in %al. gcc produces +something like the following: + + movzbl %al, %edx + leaq 0(,%rdx,4), %rax + leaq 4+L2(%rip), %rdx + leaq 239(%rsp), %rax + jmp *%rdx + movaps %xmm7, -15(%rax) + movaps %xmm6, -31(%rax) + movaps %xmm5, -47(%rax) + movaps %xmm4, -63(%rax) + movaps %xmm3, -79(%rax) + movaps %xmm2, -95(%rax) + movaps %xmm1, -111(%rax) + movaps %xmm0, -127(%rax) +L2: + +It jumps over the movaps that do not need to be stored. Hard to see this being +significant as it added 5 instruciton (including a indirect branch) to avoid +executing 0 to 8 stores in the function prologue. + +Perhaps we can optimize for the common case where no XMM registers are used for +parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a +leaf function where we can determine that no XMM input parameter is need, avoid +emitting the stores at all. + +//===---------------------------------------------------------------------===// + +AMD64 has a complex calling convention for aggregate passing by value: + +1. If the size of an object is larger than two eightbytes, or in C++, is a non- + POD structure or union type, or contains unaligned fields, it has class + MEMORY. +2. Both eightbytes get initialized to class NO_CLASS. +3. Each field of an object is classified recursively so that always two fields + are considered. The resulting class is calculated according to the classes + of the fields in the eightbyte: + (a) If both classes are equal, this is the resulting class. + (b) If one of the classes is NO_CLASS, the resulting class is the other + class. + (c) If one of the classes is MEMORY, the result is the MEMORY class. + (d) If one of the classes is INTEGER, the result is the INTEGER. + (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as + class. + (f) Otherwise class SSE is used. +4. Then a post merger cleanup is done: + (a) If one of the classes is MEMORY, the whole argument is passed in memory. + (b) If SSEUP is not preceeded by SSE, it is converted to SSE. + +Currently llvm frontend does not handle this correctly. + +Problem 1: + typedef struct { int i; double d; } QuadWordS; +It is currently passed in two i64 integer registers. However, gcc compiled +callee expects the second element 'd' to be passed in XMM0. + +Problem 2: + typedef struct { int32_t i; float j; double d; } QuadWordS; +The size of the first two fields == i64 so they will be combined and passed in +a integer register RDI. The third field is still passed in XMM0. + +Problem 3: + typedef struct { int64_t i; int8_t j; int64_t d; } S; + void test(S s) +The size of this aggregate is greater than two i64 so it should be passed in +memory. Currently llvm breaks this down and passed it in three integer +registers. + +Problem 4: +Taking problem 3 one step ahead where a function expects a aggregate value +in memory followed by more parameter(s) passed in register(s). + void test(S s, int b) + +LLVM IR does not allow parameter passing by aggregates, therefore it must break +the aggregates value (in problem 3 and 4) into a number of scalar values: + void %test(long %s.i, byte %s.j, long %s.d); + +However, if the backend were to lower this code literally it would pass the 3 +values in integer registers. To force it be passed in memory, the frontend +should change the function signiture to: + void %test(long %undef1, long %undef2, long %undef3, long %undef4, + long %undef5, long %undef6, + long %s.i, byte %s.j, long %s.d); +And the callee would look something like this: + call void %test( undef, undef, undef, undef, undef, undef, + %tmp.s.i, %tmp.s.j, %tmp.s.d ); +The first 6 undef parameters would exhaust the 6 integer registers used for +parameter passing. The following three integer values would then be forced into +memory. + +For problem 4, the parameter 'd' would be moved to the front of the parameter +list so it will be passed in register: + void %test(int %d, + long %undef1, long %undef2, long %undef3, long %undef4, + long %undef5, long %undef6, + long %s.i, byte %s.j, long %s.d); + +//===---------------------------------------------------------------------===// + +For this: + +extern int dst[]; +extern int* ptr; + +void test(void) { + ptr = dst; +} + +We generate this code for static relocation model: + +_test: + leaq _dst(%rip), %rax + movq %rax, _ptr(%rip) + ret + +If we are in small code model, they we can treat _dst as a 32-bit constant. + movq $_dst, _ptr(%rip) + +Note, however, we should continue to use RIP relative addressing mode as much as +possible. The above is actually one byte shorter than + movq $_dst, _ptr + +//===---------------------------------------------------------------------===// + +Right now the asm printer assumes GlobalAddress are accessed via RIP relative +addressing. Therefore, it is not possible to generate this: + movabsq $__ZTV10polynomialIdE+16, %rax + +That is ok for now since we currently only support small model. So the above +is selected as + leaq __ZTV10polynomialIdE+16(%rip), %rax + +This is probably slightly slower but is much shorter than movabsq. However, if +we were to support medium or larger code models, we need to use the movabs +instruction. We should probably introduce something like AbsoluteAddress to +distinguish it from GlobalAddress so the asm printer and JIT code emitter can +do the right thing. diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index e15512db23..c4b3d8635f 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -20,8 +20,8 @@ include "../Target.td" // X86 Subtarget features. // -def Feature64Bit : SubtargetFeature<"64bit", "Is64Bit", "true", - "Enable 64-bit instructions">; +def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", + "Support 64-bit instructions">; def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", @@ -61,6 +61,8 @@ def : Proc<"prescott", [FeatureMMX, FeatureSSE1, FeatureSSE2, FeatureSSE3]>; def : Proc<"nocona", [FeatureMMX, FeatureSSE1, FeatureSSE2, FeatureSSE3, Feature64Bit]>; +def : Proc<"core2", [FeatureMMX, FeatureSSE1, FeatureSSE2, + FeatureSSE3, Feature64Bit]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; @@ -105,16 +107,20 @@ def X86InstrInfo : InstrInfo { // should be kept up-to-date with the fields in the X86InstrInfo.h file. let TSFlagsFields = ["FormBits", "hasOpSizePrefix", + "hasAdSizePrefix", "Prefix", + "hasREX_WPrefix", "ImmTypeBits", "FPFormBits", "Opcode"]; let TSFlagsShifts = [0, 6, 7, - 11, + 8, + 12, 13, - 16]; + 16, + 24]; } // The X86 target supports two different syntaxes for emitting machine code. diff --git a/lib/Target/X86/X86ATTAsmPrinter.cpp b/lib/Target/X86/X86ATTAsmPrinter.cpp index e3653e4920..b17cde18de 100755 --- a/lib/Target/X86/X86ATTAsmPrinter.cpp +++ b/lib/Target/X86/X86ATTAsmPrinter.cpp @@ -126,8 +126,9 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << '%'; unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { - MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0) - ? MVT::i16 : MVT::i8; + MVT::ValueType VT = (strcmp(Modifier+6,"64") == 0) ? + MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : + ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); Reg = getX86SubSuperRegister(Reg, VT); } for (const char *Name = RI.get(Reg).Name; *Name; ++Name) @@ -148,9 +149,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, if (!isMemOp) O << '$'; O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << "_" << MO.getJumpTableIndex(); - if (Subtarget->isTargetDarwin() && + if (X86PICStyle == PICStyle::Stub && TM.getRelocationModel() == Reloc::PIC_) O << "-\"L" << getFunctionNumber() << "$pb\""; + if (Subtarget->is64Bit()) + O << "(%rip)"; return; } case MachineOperand::MO_ConstantPoolIndex: { @@ -158,7 +161,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, if (!isMemOp) O << '$'; O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getConstantPoolIndex(); - if (Subtarget->isTargetDarwin() && + if (X86PICStyle == PICStyle::Stub && TM.getRelocationModel() == Reloc::PIC_) O << "-\"L" << getFunctionNumber() << "$pb\""; int Offset = MO.getOffset(); @@ -166,47 +169,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << "+" << Offset; else if (Offset < 0) O << Offset; + + if (Subtarget->is64Bit()) + O << "(%rip)"; return; } case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); bool isMemOp = Modifier && !strcmp(Modifier, "mem"); if (!isMemOp && !isCallOp) O << '$'; - // Darwin block shameless ripped from PPCAsmPrinter.cpp - if (Subtarget->isTargetDarwin() && + + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + bool isExt = (GV->isExternal() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()); + if (X86PICStyle == PICStyle::Stub && TM.getRelocationModel() != Reloc::Static) { - GlobalValue *GV = MO.getGlobal(); - std::string Name = Mang->getValueName(GV); // Link-once, External, or Weakly-linked global variables need // non-lazily-resolved stubs - if (GV->isExternal() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage()) { + if (isExt) { // Dynamically-resolved functions need a stub for the function. - if (isCallOp && isa<Function>(GV) && cast<Function>(GV)->isExternal()) { + if (isCallOp && isa<Function>(GV)) { FnStubs.insert(Name); O << "L" << Name << "$stub"; } else { GVStubs.insert(Name); O << "L" << Name << "$non_lazy_ptr"; } - } else { - O << Mang->getValueName(GV); - } + } else + O << Name; if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_) O << "-\"L" << getFunctionNumber() << "$pb\""; - } else - O << Mang->getValueName(MO.getGlobal()); + } else + O << Name; + int Offset = MO.getOffset(); if (Offset > 0) O << "+" << Offset; else if (Offset < 0) O << Offset; + + if (!isCallOp && + Subtarget->is64Bit()) { + if (isExt && TM.getRelocationModel() != Reloc::Static) + O << "@GOTPCREL"; + O << "(%rip)"; + } + return; } case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); if (isCallOp && - Subtarget->isTargetDarwin() && + X86PICStyle == PICStyle::Stub && TM.getRelocationModel() != Reloc::Static) { std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); @@ -216,6 +231,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } if (!isCallOp) O << '$'; O << TAI->getGlobalPrefix() << MO.getSymbolName(); + + if (!isCallOp && + Subtarget->is64Bit()) + O << "(%rip)"; + return; } default: @@ -238,7 +258,8 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) { } } -void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){ +void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier){ assert(isMem(MI, Op) && "Invalid memory reference!"); const MachineOperand &BaseReg = MI->getOperand(Op); @@ -266,12 +287,13 @@ void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){ if (IndexReg.getReg() || BaseReg.getReg()) { O << "("; - if (BaseReg.getReg()) - printOperand(MI, Op); + if (BaseReg.getReg()) { + printOperand(MI, Op, Modifier); + } if (IndexReg.getReg()) { O << ","; - printOperand(MI, Op+2); + printOperand(MI, Op+2, Modifier); if (ScaleVal != 1) O << "," << ScaleVal; } @@ -350,43 +372,25 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, /// void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - // This works around some Darwin assembler bugs. - if (Subtarget->isTargetDarwin()) { - switch (MI->getOpcode()) { - case X86::REP_MOVSB: - O << "rep/movsb (%esi),(%edi)\n"; - return; - case X86::REP_MOVSD: - O << "rep/movsl (%esi),(%edi)\n"; - return; - case X86::REP_MOVSW: - O << "rep/movsw (%esi),(%edi)\n"; - return; - case X86::REP_STOSB: - O << "rep/stosb\n"; - return; - case X86::REP_STOSD: - O << "rep/stosl\n"; - return; - case X86::REP_STOSW: - O << "rep/stosw\n"; - return; - default: - break; - } - } // See if a truncate instruction can be turned into a nop. switch (MI->getOpcode()) { default: break; - case X86::TRUNC_GR32_GR16: - case X86::TRUNC_GR32_GR8: - case X86::TRUNC_GR16_GR8: { + case X86::TRUNC_64to32: + case X86::TRUNC_64to16: + case X86::TRUNC_32to16: + case X86::TRUNC_32to8: + case X86::TRUNC_16to8: + case X86::TRUNC_32_to8: + case X86::TRUNC_16_to8: { const MachineOperand &MO0 = MI->getOperand(0); const MachineOperand &MO1 = MI->getOperand(1); unsigned Reg0 = MO0.getReg(); unsigned Reg1 = MO1.getReg(); - if (MI->getOpcode() == X86::TRUNC_GR32_GR16) + unsigned Opc = MI->getOpcode(); + if (Opc == X86::TRUNC_64to32) + Reg1 = getX86SubSuperRegister(Reg1, MVT::i32); + else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16) Reg1 = getX86SubSuperRegister(Reg1, MVT::i16); else Reg1 = getX86SubSuperRegister(Reg1, MVT::i8); @@ -395,6 +399,9 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) { O << "\n\t"; break; } + case X86::PsMOVZX64rr32: + O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t"; + break; } // Call the autogenerated instruction printer routines. diff --git a/lib/Target/X86/X86ATTAsmPrinter.h b/lib/Target/X86/X86ATTAsmPrinter.h index ff707caee6..167e812f4d 100755 --- a/lib/Target/X86/X86ATTAsmPrinter.h +++ b/lib/Target/X86/X86ATTAsmPrinter.h @@ -60,6 +60,9 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter { void printf128mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } + void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo, "subreg64"); + } bool printAsmMRegister(const MachineOperand &MO, const char Mode); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -69,7 +72,8 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter { void printMachineInstruction(const MachineInstr *MI); void printSSECC(const MachineInstr *MI, unsigned Op); - void printMemReference(const MachineInstr *MI, unsigned Op); + void printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); void printPICLabel(const MachineInstr *MI, unsigned Op); bool runOnMachineFunction(MachineFunction &F); }; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 4a54e5914d..b634d13ea4 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -30,8 +30,12 @@ Statistic<> llvm::EmittedInsts("asm-printer", "Number of machine instrs printed"); /// doInitialization -bool X86SharedAsmPrinter::doInitialization(Module &M) { +bool X86SharedAsmPrinter::doInitialization(Module &M) { if (Subtarget->isTargetDarwin()) { + const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); + if (!Subtarget->is64Bit()) + X86PICStyle = PICStyle::Stub; + // Emit initial debug information. DW.BeginModule(&M); } diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 8d32f59d8b..6db9e45dc3 100755 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -29,12 +29,19 @@ namespace llvm { extern Statistic<> EmittedInsts; +// FIXME: Move this to CodeGen/AsmPrinter.h +namespace PICStyle { + enum X86AsmPICStyle { + Stub, GOT + }; +} + struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter { DwarfWriter DW; X86SharedAsmPrinter(std::ostream &O, X86TargetMachine &TM, const TargetAsmInfo *T) - : AsmPrinter(O, TM, T), DW(O, this, T) { + : AsmPrinter(O, TM, T), DW(O, this, T), X86PICStyle(PICStyle::GOT) { Subtarget = &TM.getSubtarget<X86Subtarget>(); } @@ -49,6 +56,8 @@ struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter { MachineFunctionPass::getAnalysisUsage(AU); } + PICStyle::X86AsmPICStyle X86PICStyle; + const X86Subtarget *Subtarget; // Necessary for Darwin to print out the apprioriate types of linker stubs diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index f7d53caed4..0ac8bc5f32 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "X86InstrInfo.h" +#include "X86Subtarget.h" #include "X86TargetMachine.h" #include "X86Relocations.h" #include "X86.h" @@ -35,14 +37,16 @@ namespace { namespace { class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass { const X86InstrInfo *II; - TargetMachine &TM; + const TargetData *TD; + TargetMachine &TM; MachineCodeEmitter &MCE; + bool Is64BitMode; public: explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce) - : II(0), TM(tm), MCE(mce) {} + : II(0), TD(0), TM(tm), MCE(mce), Is64BitMode(false) {} Emitter(TargetMachine &tm, MachineCodeEmitter &mce, - const X86InstrInfo& ii) - : II(&ii), TM(tm), MCE(mce) {} + const X86InstrInfo &ii, const TargetData &td, bool is64) + : II(&ii), TD(&td), TM(tm), MCE(mce), Is64BitMode(is64) {} bool runOnMachineFunction(MachineFunction &MF); @@ -54,20 +58,29 @@ namespace { private: void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); - void emitPCRelativeValue(unsigned Address); - void emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall); - void emitGlobalAddressForPtr(GlobalValue *GV, int Disp = 0); + void emitPCRelativeValue(intptr_t Address); + void emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub); + void emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative, + int Disp = 0, unsigned PCAdj = 0); void emitExternalSymbolAddress(const char *ES, bool isPCRelative); + void emitPCRelativeConstPoolAddress(unsigned CPI, int Disp = 0, + unsigned PCAdj = 0); + void emitPCRelativeJumpTableAddress(unsigned JTI, unsigned PCAdj = 0); - void emitDisplacementField(const MachineOperand *RelocOp, int DispVal); + void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, + unsigned PCAdj = 0); void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); void emitSIBByte(unsigned SS, unsigned Index, unsigned Base); - void emitConstant(unsigned Val, unsigned Size); + void emitConstant(uint64_t Val, unsigned Size); void emitMemModRMByte(const MachineInstr &MI, - unsigned Op, unsigned RegOpcodeField); + unsigned Op, unsigned RegOpcodeField, + unsigned PCAdj = 0); + unsigned getX86RegNum(unsigned RegNo); + bool isX86_64ExtendedReg(const MachineOperand &MO); + unsigned determineREX(const MachineInstr &MI); }; } @@ -83,6 +96,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo(); + TD = ((X86TargetMachine&)MF.getTarget()).getTargetData(); + Is64BitMode = + ((X86TargetMachine&)MF.getTarget()).getSubtarget<X86Subtarget>().is64Bit(); do { MCE.startFunction(MF); @@ -98,9 +114,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { return false; } -/// emitPCRelativeValue - Emit a 32-bit PC relative address. +/// emitPCRelativeValue - Emit a PC relative address. /// -void Emitter::emitPCRelativeValue(unsigned Address) { +void Emitter::emitPCRelativeValue(intptr_t Address) { MCE.emitWordLE(Address-MCE.getCurrentPCValue()-4); } @@ -119,20 +135,22 @@ void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { /// emitGlobalAddressForCall - Emit the specified address to the code stream /// assuming this is part of a function call, which is PC relative. /// -void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall) { +void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub) { MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), X86::reloc_pcrel_word, GV, 0, - !isTailCall /*Doesn'tNeedStub*/)); + DoesntNeedStub)); MCE.emitWordLE(0); } /// emitGlobalAddress - Emit the specified address to the code stream assuming -/// this is part of a "take the address of a global" instruction, which is not -/// PC relative. +/// this is part of a "take the address of a global" instruction. /// -void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, int Disp /* = 0 */) { - MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), - X86::reloc_absolute_word, GV)); +void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative, + int Disp /* = 0 */, + unsigned PCAdj /* = 0 */) { + unsigned rt = isPCRelative ? X86::reloc_pcrel_word : X86::reloc_absolute_word; + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), rt, + GV, PCAdj)); MCE.emitWordLE(Disp); // The relocated value will be added to the displacement } @@ -145,6 +163,26 @@ void Emitter::emitExternalSymbolAddress(const char *ES, bool isPCRelative) { MCE.emitWordLE(0); } +/// emitPCRelativeConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +void Emitter::emitPCRelativeConstPoolAddress(unsigned CPI, int Disp /* = 0 */, + unsigned PCAdj /* = 0 */) { + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + X86::reloc_pcrel_word, CPI, PCAdj)); + MCE.emitWordLE(Disp); // The relocated value will be added to the displacement +} + +/// emitPCRelativeJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +void Emitter::emitPCRelativeJumpTableAddress(unsigned JTI, + unsigned PCAdj /* = 0 */) { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + X86::reloc_pcrel_word, JTI, PCAdj)); + MCE.emitWordLE(0); // The relocated value will be added to the displacement +} + /// N86 namespace - Native X86 Register numbers... used by X86 backend. /// namespace N86 { @@ -153,28 +191,53 @@ namespace N86 { }; } - // getX86RegNum - This function maps LLVM register identifiers to their X86 // specific numbering, which is used in various places encoding instructions. // -static unsigned getX86RegNum(unsigned RegNo) { +unsigned Emitter::getX86RegNum(unsigned RegNo) { switch(RegNo) { - case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; - case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; - case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; - case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; - case X86::ESP: case X86::SP: case X86::AH: return N86::ESP; - case X86::EBP: case X86::BP: case X86::CH: return N86::EBP; - case X86::ESI: case X86::SI: case X86::DH: return N86::ESI; - case X86::EDI: case X86::DI: case X86::BH: return N86::EDI; + case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; + case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; + case X86::RDX: case X86::EDX: case X86::DX: cas |