Committing X86-64 support.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30177 91177308-0d34-0410-b5e6-96231b3b80d8
author: Evan Cheng <evan.cheng@apple.com> 2006-09-08 06:48:29 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2006-09-08 06:48:29 +0000
commit: 25ab690a43cbbb591b76d49e3595b019c32f4b3f (patch)
tree: fe952a3e394b9f01b6ce8ed8691cee8c507ed094
parent: 1e5fb6928c510bc945dbcd23d99022288ad7e863 (diff)
25 files changed, 3603 insertions, 462 deletions
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
new file mode 100644
index 0000000000..af3e273237
--- /dev/null
+++ b/lib/Target/X86/README-X86-64.txt
@@ -0,0 +1,269 @@
+//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
+
+Implement different PIC models? Right now we only support Mac OS X with small
+PIC code model.
+
+//===---------------------------------------------------------------------===//
+
+Make use of "Red Zone".
+
+//===---------------------------------------------------------------------===//
+
+Implement __int128 and long double support.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+extern void xx(void);
+void bar(void) {
+  xx();
+}
+
+gcc compiles to:
+
+.globl _bar
+_bar:
+	jmp	_xx
+
+We need to do the tailcall optimization as well.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+int test(int a)
+{
+  return a * 3;
+}
+
+We generates
+	leal (%edi,%edi,2), %eax
+
+We should be generating 
+	leal (%rdi,%rdi,2), %eax
+
+instead. The later form does not require an address-size prefix 67H.
+
+It's probably ok to simply emit the corresponding 64-bit super class registers
+in this case?
+
+
+//===---------------------------------------------------------------------===//
+
+AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
+multiplication by a constant. How much of it applies to Intel's X86-64
+implementation? There are definite trade-offs to consider: latency vs. register
+pressure vs. code size.
+
+//===---------------------------------------------------------------------===//
+
+Are we better off using branches instead of cmove to implement FP to
+unsigned i64?
+
+_conv:
+	ucomiss	LC0(%rip), %xmm0
+	cvttss2siq	%xmm0, %rdx
+	jb	L3
+	subss	LC0(%rip), %xmm0
+	movabsq	$-9223372036854775808, %rax
+	cvttss2siq	%xmm0, %rdx
+	xorq	%rax, %rdx
+L3:
+	movq	%rdx, %rax
+	ret
+
+instead of
+
+_conv:
+	movss LCPI1_0(%rip), %xmm1
+	cvttss2siq %xmm0, %rcx
+	movaps %xmm0, %xmm2
+	subss %xmm1, %xmm2
+	cvttss2siq %xmm2, %rax
+	movabsq $-9223372036854775808, %rdx
+	xorq %rdx, %rax
+	ucomiss %xmm1, %xmm0
+	cmovb %rcx, %rax
+	ret
+
+Seems like the jb branch has high likelyhood of being taken. It would have
+saved a few instructions.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen:
+
+int X[2];
+int b;
+void test(void) {
+  memset(X, b, 2*sizeof(X[0]));
+}
+
+llc:
+	movq _b@GOTPCREL(%rip), %rax
+	movzbq (%rax), %rax
+	movq %rax, %rcx
+	shlq $8, %rcx
+	orq %rax, %rcx
+	movq %rcx, %rax
+	shlq $16, %rax
+	orq %rcx, %rax
+	movq %rax, %rcx
+	shlq $32, %rcx
+	movq _X@GOTPCREL(%rip), %rdx
+	orq %rax, %rcx
+	movq %rcx, (%rdx)
+	ret
+
+gcc:
+	movq	_b@GOTPCREL(%rip), %rax
+	movabsq	$72340172838076673, %rdx
+	movzbq	(%rax), %rax
+	imulq	%rdx, %rax
+	movq	_X@GOTPCREL(%rip), %rdx
+	movq	%rax, (%rdx)
+	ret
+
+//===---------------------------------------------------------------------===//
+
+Vararg function prologue can be further optimized. Currently all XMM registers
+are stored into register save area. Most of them can be eliminated since the
+upper bound of the number of XMM registers used are passed in %al. gcc produces
+something like the following:
+
+	movzbl	%al, %edx
+	leaq	0(,%rdx,4), %rax
+	leaq	4+L2(%rip), %rdx
+	leaq	239(%rsp), %rax
+       	jmp	*%rdx
+	movaps	%xmm7, -15(%rax)
+	movaps	%xmm6, -31(%rax)
+	movaps	%xmm5, -47(%rax)
+	movaps	%xmm4, -63(%rax)
+	movaps	%xmm3, -79(%rax)
+	movaps	%xmm2, -95(%rax)
+	movaps	%xmm1, -111(%rax)
+	movaps	%xmm0, -127(%rax)
+L2:
+
+It jumps over the movaps that do not need to be stored. Hard to see this being
+significant as it added 5 instruciton (including a indirect branch) to avoid
+executing 0 to 8 stores in the function prologue.
+
+Perhaps we can optimize for the common case where no XMM registers are used for
+parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
+leaf function where we can determine that no XMM input parameter is need, avoid
+emitting the stores at all.
+
+//===---------------------------------------------------------------------===//
+
+AMD64 has a complex calling convention for aggregate passing by value:
+
+1. If the size of an object is larger than two eightbytes, or in C++, is a non- 
+   POD structure or union type, or contains unaligned fields, it has class 
+   MEMORY.
+2. Both eightbytes get initialized to class NO_CLASS. 
+3. Each field of an object is classified recursively so that always two fields
+   are considered. The resulting class is calculated according to the classes
+   of the fields in the eightbyte: 
+   (a) If both classes are equal, this is the resulting class. 
+   (b) If one of the classes is NO_CLASS, the resulting class is the other 
+       class. 
+   (c) If one of the classes is MEMORY, the result is the MEMORY class. 
+   (d) If one of the classes is INTEGER, the result is the INTEGER. 
+   (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
+      class. 
+   (f) Otherwise class SSE is used. 
+4. Then a post merger cleanup is done: 
+   (a) If one of the classes is MEMORY, the whole argument is passed in memory. 
+   (b) If SSEUP is not preceeded by SSE, it is converted to SSE.
+
+Currently llvm frontend does not handle this correctly.
+
+Problem 1:
+    typedef struct { int i; double d; } QuadWordS;
+It is currently passed in two i64 integer registers. However, gcc compiled
+callee expects the second element 'd' to be passed in XMM0.
+
+Problem 2:
+    typedef struct { int32_t i; float j; double d; } QuadWordS;
+The size of the first two fields == i64 so they will be combined and passed in
+a integer register RDI. The third field is still passed in XMM0.
+
+Problem 3:
+    typedef struct { int64_t i; int8_t j; int64_t d; } S;
+    void test(S s)
+The size of this aggregate is greater than two i64 so it should be passed in 
+memory. Currently llvm breaks this down and passed it in three integer
+registers.
+
+Problem 4:
+Taking problem 3 one step ahead where a function expects a aggregate value
+in memory followed by more parameter(s) passed in register(s).
+    void test(S s, int b)
+
+LLVM IR does not allow parameter passing by aggregates, therefore it must break
+the aggregates value (in problem 3 and 4) into a number of scalar values:
+    void %test(long %s.i, byte %s.j, long %s.d);
+
+However, if the backend were to lower this code literally it would pass the 3
+values in integer registers. To force it be passed in memory, the frontend
+should change the function signiture to:
+    void %test(long %undef1, long %undef2, long %undef3, long %undef4, 
+               long %undef5, long %undef6,
+               long %s.i, byte %s.j, long %s.d);
+And the callee would look something like this:
+    call void %test( undef, undef, undef, undef, undef, undef,
+                     %tmp.s.i, %tmp.s.j, %tmp.s.d );
+The first 6 undef parameters would exhaust the 6 integer registers used for
+parameter passing. The following three integer values would then be forced into
+memory.
+
+For problem 4, the parameter 'd' would be moved to the front of the parameter
+list so it will be passed in register:
+    void %test(int %d,
+               long %undef1, long %undef2, long %undef3, long %undef4, 
+               long %undef5, long %undef6,
+               long %s.i, byte %s.j, long %s.d);
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+extern int dst[]; 
+extern int* ptr; 
+
+void test(void) {
+  ptr = dst;
+}
+
+We generate this code for static relocation model:
+
+_test:
+	leaq _dst(%rip), %rax
+	movq %rax, _ptr(%rip)
+	ret
+
+If we are in small code model, they we can treat _dst as a 32-bit constant.
+        movq $_dst, _ptr(%rip)
+
+Note, however, we should continue to use RIP relative addressing mode as much as
+possible. The above is actually one byte shorter than
+        movq $_dst, _ptr
+
+//===---------------------------------------------------------------------===//
+
+Right now the asm printer assumes GlobalAddress are accessed via RIP relative
+addressing. Therefore, it is not possible to generate this:
+        movabsq $__ZTV10polynomialIdE+16, %rax
+
+That is ok for now since we currently only support small model. So the above
+is selected as
+        leaq __ZTV10polynomialIdE+16(%rip), %rax
+
+This is probably slightly slower but is much shorter than movabsq. However, if
+we were to support medium or larger code models, we need to use the movabs
+instruction. We should probably introduce something like AbsoluteAddress to
+distinguish it from GlobalAddress so the asm printer and JIT code emitter can
+do the right thing.
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index e15512db23..c4b3d8635f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -20,8 +20,8 @@ include "../Target.td"
 // X86 Subtarget features.
 //
  
-def Feature64Bit     : SubtargetFeature<"64bit", "Is64Bit", "true",
-                                        "Enable 64-bit instructions">;
+def Feature64Bit     : SubtargetFeature<"64bit", "HasX86_64", "true",
+                                        "Support 64-bit instructions">;
 def FeatureMMX       : SubtargetFeature<"mmx","X86SSELevel", "MMX",
                                         "Enable MMX instructions">;
 def FeatureSSE1      : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
@@ -61,6 +61,8 @@ def : Proc<"prescott",        [FeatureMMX, FeatureSSE1, FeatureSSE2,
                                FeatureSSE3]>;
 def : Proc<"nocona",          [FeatureMMX, FeatureSSE1, FeatureSSE2,
                                FeatureSSE3, Feature64Bit]>;
+def : Proc<"core2",           [FeatureMMX, FeatureSSE1, FeatureSSE2,
+                               FeatureSSE3,  Feature64Bit]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [FeatureMMX, Feature3DNow]>;
@@ -105,16 +107,20 @@ def X86InstrInfo : InstrInfo {
   // should be kept up-to-date with the fields in the X86InstrInfo.h file.
   let TSFlagsFields = ["FormBits",
                        "hasOpSizePrefix",
+                       "hasAdSizePrefix",
                        "Prefix",
+                       "hasREX_WPrefix",
                        "ImmTypeBits",
                        "FPFormBits",
                        "Opcode"];
   let TSFlagsShifts = [0,
                        6,
                        7,
-                       11,
+                       8,
+                       12,
                        13,
-                       16];
+                       16,
+                       24];
 }
 
 // The X86 target supports two different syntaxes for emitting machine code.
diff --git a/lib/Target/X86/X86ATTAsmPrinter.cpp b/lib/Target/X86/X86ATTAsmPrinter.cpp
index e3653e4920..b17cde18de 100755
--- a/lib/Target/X86/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/X86ATTAsmPrinter.cpp
@@ -126,8 +126,9 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     O << '%';
     unsigned Reg = MO.getReg();
     if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
-      MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0)
-        ? MVT::i16 : MVT::i8;
+      MVT::ValueType VT = (strcmp(Modifier+6,"64") == 0) ?
+        MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+                    ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
       Reg = getX86SubSuperRegister(Reg, VT);
     }
     for (const char *Name = RI.get(Reg).Name; *Name; ++Name)
@@ -148,9 +149,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     if (!isMemOp) O << '$';
     O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << "_"
       << MO.getJumpTableIndex();
-    if (Subtarget->isTargetDarwin() && 
+    if (X86PICStyle == PICStyle::Stub &&
         TM.getRelocationModel() == Reloc::PIC_)
       O << "-\"L" << getFunctionNumber() << "$pb\"";
+    if (Subtarget->is64Bit())
+      O << "(%rip)";
     return;
   }
   case MachineOperand::MO_ConstantPoolIndex: {
@@ -158,7 +161,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     if (!isMemOp) O << '$';
     O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
       << MO.getConstantPoolIndex();
-    if (Subtarget->isTargetDarwin() && 
+    if (X86PICStyle == PICStyle::Stub &&
         TM.getRelocationModel() == Reloc::PIC_)
       O << "-\"L" << getFunctionNumber() << "$pb\"";
     int Offset = MO.getOffset();
@@ -166,47 +169,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       O << "+" << Offset;
     else if (Offset < 0)
       O << Offset;
+
+    if (Subtarget->is64Bit())
+      O << "(%rip)";
     return;
   }
   case MachineOperand::MO_GlobalAddress: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     if (!isMemOp && !isCallOp) O << '$';
-    // Darwin block shameless ripped from PPCAsmPrinter.cpp
-    if (Subtarget->isTargetDarwin() && 
+
+    GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getValueName(GV);
+    bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
+                  GV->hasLinkOnceLinkage());
+    if (X86PICStyle == PICStyle::Stub &&
         TM.getRelocationModel() != Reloc::Static) {
-      GlobalValue *GV = MO.getGlobal();
-      std::string Name = Mang->getValueName(GV);
       // Link-once, External, or Weakly-linked global variables need
       // non-lazily-resolved stubs
-      if (GV->isExternal() || GV->hasWeakLinkage() ||
-          GV->hasLinkOnceLinkage()) {
+      if (isExt) {
         // Dynamically-resolved functions need a stub for the function.
-        if (isCallOp && isa<Function>(GV) && cast<Function>(GV)->isExternal()) {
+        if (isCallOp && isa<Function>(GV)) {
           FnStubs.insert(Name);
           O << "L" << Name << "$stub";
         } else {
           GVStubs.insert(Name);
           O << "L" << Name << "$non_lazy_ptr";
         }
-      } else {
-        O << Mang->getValueName(GV);
-      } 
+      } else
+        O << Name;
       if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
         O << "-\"L" << getFunctionNumber() << "$pb\"";
-   } else
-      O << Mang->getValueName(MO.getGlobal());
+    } else
+      O << Name;
+
     int Offset = MO.getOffset();
     if (Offset > 0)
       O << "+" << Offset;
     else if (Offset < 0)
       O << Offset;
+
+    if (!isCallOp &&
+        Subtarget->is64Bit()) {
+      if (isExt && TM.getRelocationModel() != Reloc::Static)
+        O << "@GOTPCREL";
+      O << "(%rip)";
+    }
+
     return;
   }
   case MachineOperand::MO_ExternalSymbol: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
     if (isCallOp && 
-        Subtarget->isTargetDarwin() && 
+        X86PICStyle == PICStyle::Stub &&
         TM.getRelocationModel() != Reloc::Static) {
       std::string Name(TAI->getGlobalPrefix());
       Name += MO.getSymbolName();
@@ -216,6 +231,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     }
     if (!isCallOp) O << '$';
     O << TAI->getGlobalPrefix() << MO.getSymbolName();
+
+    if (!isCallOp &&
+        Subtarget->is64Bit())
+      O << "(%rip)";
+
     return;
   }
   default:
@@ -238,7 +258,8 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
   }
 }
 
-void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
+void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+                                         const char *Modifier){
   assert(isMem(MI, Op) && "Invalid memory reference!");
 
   const MachineOperand &BaseReg  = MI->getOperand(Op);
@@ -266,12 +287,13 @@ void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
 
   if (IndexReg.getReg() || BaseReg.getReg()) {
     O << "(";
-    if (BaseReg.getReg())
-      printOperand(MI, Op);
+    if (BaseReg.getReg()) {
+      printOperand(MI, Op, Modifier);
+    }
 
     if (IndexReg.getReg()) {
       O << ",";
-      printOperand(MI, Op+2);
+      printOperand(MI, Op+2, Modifier);
       if (ScaleVal != 1)
         O << "," << ScaleVal;
     }
@@ -350,43 +372,25 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 ///
 void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
-  // This works around some Darwin assembler bugs.
-  if (Subtarget->isTargetDarwin()) {
-    switch (MI->getOpcode()) {
-    case X86::REP_MOVSB:
-      O << "rep/movsb (%esi),(%edi)\n";
-      return;
-    case X86::REP_MOVSD:
-      O << "rep/movsl (%esi),(%edi)\n";
-      return;
-    case X86::REP_MOVSW:
-      O << "rep/movsw (%esi),(%edi)\n";
-      return;
-    case X86::REP_STOSB:
-      O << "rep/stosb\n";
-      return;
-    case X86::REP_STOSD:
-      O << "rep/stosl\n";
-      return;
-    case X86::REP_STOSW:
-      O << "rep/stosw\n";
-      return;
-    default:
-      break;
-    }
-  }
 
   // See if a truncate instruction can be turned into a nop.
   switch (MI->getOpcode()) {
   default: break;
-  case X86::TRUNC_GR32_GR16:
-  case X86::TRUNC_GR32_GR8:
-  case X86::TRUNC_GR16_GR8: {
+  case X86::TRUNC_64to32:
+  case X86::TRUNC_64to16:
+  case X86::TRUNC_32to16:
+  case X86::TRUNC_32to8:
+  case X86::TRUNC_16to8:
+  case X86::TRUNC_32_to8:
+  case X86::TRUNC_16_to8: {
     const MachineOperand &MO0 = MI->getOperand(0);
     const MachineOperand &MO1 = MI->getOperand(1);
     unsigned Reg0 = MO0.getReg();
     unsigned Reg1 = MO1.getReg();
-    if (MI->getOpcode() == X86::TRUNC_GR32_GR16)
+    unsigned Opc = MI->getOpcode();
+    if (Opc == X86::TRUNC_64to32)
+      Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
+    else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
       Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
     else
       Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
@@ -395,6 +399,9 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
       O << "\n\t";
     break;
   }
+  case X86::PsMOVZX64rr32:
+    O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
+    break;
   }
 
   // Call the autogenerated instruction printer routines.
diff --git a/lib/Target/X86/X86ATTAsmPrinter.h b/lib/Target/X86/X86ATTAsmPrinter.h
index ff707caee6..167e812f4d 100755
--- a/lib/Target/X86/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/X86ATTAsmPrinter.h
@@ -60,6 +60,9 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
   void printf128mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
+  void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo, "subreg64");
+  }
   
   bool printAsmMRegister(const MachineOperand &MO, const char Mode);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -69,7 +72,8 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
   
   void printMachineInstruction(const MachineInstr *MI);
   void printSSECC(const MachineInstr *MI, unsigned Op);
-  void printMemReference(const MachineInstr *MI, unsigned Op);
+  void printMemReference(const MachineInstr *MI, unsigned Op,
+                         const char *Modifier=NULL);
   void printPICLabel(const MachineInstr *MI, unsigned Op);
   bool runOnMachineFunction(MachineFunction &F);
 };
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 4a54e5914d..b634d13ea4 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -30,8 +30,12 @@ Statistic<> llvm::EmittedInsts("asm-printer",
                                "Number of machine instrs printed");
 
 /// doInitialization
-bool X86SharedAsmPrinter::doInitialization(Module &M) {  
+bool X86SharedAsmPrinter::doInitialization(Module &M) {
   if (Subtarget->isTargetDarwin()) {
+    const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+    if (!Subtarget->is64Bit())
+      X86PICStyle = PICStyle::Stub;
+
     // Emit initial debug information.
     DW.BeginModule(&M);
   }
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 8d32f59d8b..6db9e45dc3 100755
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -29,12 +29,19 @@ namespace llvm {
 
 extern Statistic<> EmittedInsts;
 
+// FIXME: Move this to CodeGen/AsmPrinter.h
+namespace PICStyle {
+  enum X86AsmPICStyle {
+    Stub, GOT
+  };
+}
+
 struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
   DwarfWriter DW;
 
   X86SharedAsmPrinter(std::ostream &O, X86TargetMachine &TM,
                       const TargetAsmInfo *T)
-    : AsmPrinter(O, TM, T), DW(O, this, T) {
+    : AsmPrinter(O, TM, T), DW(O, this, T), X86PICStyle(PICStyle::GOT) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
   }
 
@@ -49,6 +56,8 @@ struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
+  PICStyle::X86AsmPICStyle X86PICStyle;
+  
   const X86Subtarget *Subtarget;
 
   // Necessary for Darwin to print out the apprioriate types of linker stubs
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f7d53caed4..0ac8bc5f32 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -12,6 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
 #include "X86TargetMachine.h"
 #include "X86Relocations.h"
 #include "X86.h"
@@ -35,14 +37,16 @@ namespace {
 namespace {
   class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
     const X86InstrInfo  *II;
-    TargetMachine &TM;
+    const TargetData    *TD;
+    TargetMachine       &TM;
     MachineCodeEmitter  &MCE;
+    bool Is64BitMode;
   public:
     explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce)
-      : II(0), TM(tm), MCE(mce) {}
+      : II(0), TD(0), TM(tm), MCE(mce), Is64BitMode(false) {}
     Emitter(TargetMachine &tm, MachineCodeEmitter &mce,
-            const X86InstrInfo& ii)
-      : II(&ii), TM(tm), MCE(mce) {}
+            const X86InstrInfo &ii, const TargetData &td, bool is64)
+      : II(&ii), TD(&td), TM(tm), MCE(mce), Is64BitMode(is64) {}
 
     bool runOnMachineFunction(MachineFunction &MF);
 
@@ -54,20 +58,29 @@ namespace {
 
   private:
     void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
-    void emitPCRelativeValue(unsigned Address);
-    void emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall);
-    void emitGlobalAddressForPtr(GlobalValue *GV, int Disp = 0);
+    void emitPCRelativeValue(intptr_t Address);
+    void emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub);
+    void emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
+                                 int Disp = 0, unsigned PCAdj = 0);
     void emitExternalSymbolAddress(const char *ES, bool isPCRelative);
+    void emitPCRelativeConstPoolAddress(unsigned CPI, int Disp = 0,
+                                        unsigned PCAdj = 0);
+    void emitPCRelativeJumpTableAddress(unsigned JTI, unsigned PCAdj = 0);
 
-    void emitDisplacementField(const MachineOperand *RelocOp, int DispVal);
+    void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+                               unsigned PCAdj = 0);
 
     void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
     void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
-    void emitConstant(unsigned Val, unsigned Size);
+    void emitConstant(uint64_t Val, unsigned Size);
 
     void emitMemModRMByte(const MachineInstr &MI,
-                          unsigned Op, unsigned RegOpcodeField);
+                          unsigned Op, unsigned RegOpcodeField,
+                          unsigned PCAdj = 0);
 
+    unsigned getX86RegNum(unsigned RegNo);
+    bool isX86_64ExtendedReg(const MachineOperand &MO);
+    unsigned determineREX(const MachineInstr &MI);
   };
 }
 
@@ -83,6 +96,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) {
           MF.getTarget().getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
   II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo();
+  TD = ((X86TargetMachine&)MF.getTarget()).getTargetData();
+  Is64BitMode =
+    ((X86TargetMachine&)MF.getTarget()).getSubtarget<X86Subtarget>().is64Bit();
 
   do {
     MCE.startFunction(MF);
@@ -98,9 +114,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-/// emitPCRelativeValue - Emit a 32-bit PC relative address.
+/// emitPCRelativeValue - Emit a PC relative address.
 ///
-void Emitter::emitPCRelativeValue(unsigned Address) {
+void Emitter::emitPCRelativeValue(intptr_t Address) {
   MCE.emitWordLE(Address-MCE.getCurrentPCValue()-4);
 }
 
@@ -119,20 +135,22 @@ void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
 /// emitGlobalAddressForCall - Emit the specified address to the code stream
 /// assuming this is part of a function call, which is PC relative.
 ///
-void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall) {
+void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub) {
   MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
                                       X86::reloc_pcrel_word, GV, 0,
-                                      !isTailCall /*Doesn'tNeedStub*/));
+                                      DoesntNeedStub));
   MCE.emitWordLE(0);
 }
 
 /// emitGlobalAddress - Emit the specified address to the code stream assuming
-/// this is part of a "take the address of a global" instruction, which is not
-/// PC relative.
+/// this is part of a "take the address of a global" instruction.
 ///
-void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, int Disp /* = 0 */) {
-  MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
-                                      X86::reloc_absolute_word, GV));
+void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
+                                      int Disp /* = 0 */,
+                                      unsigned PCAdj /* = 0 */) {
+  unsigned rt = isPCRelative ? X86::reloc_pcrel_word : X86::reloc_absolute_word;
+  MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), rt,
+                                             GV, PCAdj));
   MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
 }
 
@@ -145,6 +163,26 @@ void Emitter::emitExternalSymbolAddress(const char *ES, bool isPCRelative) {
   MCE.emitWordLE(0);
 }
 
+/// emitPCRelativeConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void Emitter::emitPCRelativeConstPoolAddress(unsigned CPI, int Disp /* = 0 */,
+                                             unsigned PCAdj /* = 0 */) {
+  MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                            X86::reloc_pcrel_word, CPI, PCAdj));
+  MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
+}
+
+/// emitPCRelativeJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void Emitter::emitPCRelativeJumpTableAddress(unsigned JTI,
+                                             unsigned PCAdj /* = 0 */) {
+  MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                            X86::reloc_pcrel_word, JTI, PCAdj));
+  MCE.emitWordLE(0); // The relocated value will be added to the displacement
+}
+
 /// N86 namespace - Native X86 Register numbers... used by X86 backend.
 ///
 namespace N86 {
@@ -153,28 +191,53 @@ namespace N86 {
   };
 }
 
-
 // getX86RegNum - This function maps LLVM register identifiers to their X86
 // specific numbering, which is used in various places encoding instructions.
 //
-static unsigned getX86RegNum(unsigned RegNo) {
+unsigned Emitter::getX86RegNum(unsigned RegNo) {
   switch(RegNo) {
-  case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
-  case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
-  case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
-  case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
-  case X86::ESP: case X86::SP: case X86::AH: return N86::ESP;
-  case X86::EBP: case X86::BP: case X86::CH: return N86::EBP;
-  case X86::ESI: case X86::SI: case X86::DH: return N86::ESI;
-  case X86::EDI: case X86::DI: case X86::BH: return N86::EDI;
+  case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+  case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+  case X86::RDX: case X86::EDX: case X86::DX: cas
author	Evan Cheng <evan.cheng@apple.com>	2006-09-08 06:48:29 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2006-09-08 06:48:29 +0000
commit	25ab690a43cbbb591b76d49e3595b019c32f4b3f (patch)
tree	fe952a3e394b9f01b6ce8ed8691cee8c507ed094
parent	1e5fb6928c510bc945dbcd23d99022288ad7e863 (diff)