aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86NaClJITInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86NaClJITInfo.cpp')
-rw-r--r--lib/Target/X86/X86NaClJITInfo.cpp393
1 files changed, 393 insertions, 0 deletions
diff --git a/lib/Target/X86/X86NaClJITInfo.cpp b/lib/Target/X86/X86NaClJITInfo.cpp
new file mode 100644
index 0000000000..e5ccbf960d
--- /dev/null
+++ b/lib/Target/X86/X86NaClJITInfo.cpp
@@ -0,0 +1,393 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target on Native Client
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86NaClJITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include <cstdlib>
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Valgrind.h"
+#ifdef __native_client__
+#include <nacl/nacl_dyncode.h>
+#endif
+
+using namespace llvm;
+
+extern cl::opt<int> FlagSfiX86JmpMask;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#elif defined(__pnacl__)
+#warning "PNaCl does not yet have JIT support"
+#else
+#error "Should not be building X86NaClJITInfo on non-x86"
+// TODO(dschuff): make this work under pnacl self-build?
+#endif
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+
+void X86NaClJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ // We don't know the original instruction boundaries, so we replace the
+ // whole bundle.
+ uint8_t buf[kBundleSize];
+ buf[0] = 0xE9; // Emit JMP opcode.
+ intptr_t OldAddr = ((uintptr_t)Old + 1);
+ uint32_t NewOffset = (intptr_t)New - OldAddr - 4;// PC-relative offset of new
+ *((uint32_t*)(buf + 1)) = NewOffset;
+ memcpy(buf + 5, getNopSequence(kBundleSize - 5), kBundleSize - 5);
+
+#ifdef __native_client__
+ if(nacl_dyncode_create(Old, buf, kBundleSize)) {
+ report_fatal_error("machine code replacement failed");
+ }
+#endif
+
+ // X86 doesn't need to invalidate the processor cache, so just invalidate
+ // Valgrind's cache directly.
+ sys::ValgrindDiscardTranslations(Old, 5);
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+extern "C" {
+#if defined(X86_64_JIT) || defined(__pnacl__) || !defined(__native_client__)
+void X86NaClCompilationCallback(void) {
+//TODO(dschuff): implement for X86-64
+}
+void X86NaClCompilationCallback_fastcc(void) {
+//TODO(dschuff): implement for X86-64
+}
+#else
+// Chrome system requirements include PIII, So SSE is present.
+// For now this is the same as X86CompilationCallback_SSE
+// In the future we could emit this rather than defining it with asm, for
+// compatibility with pnacl self-build
+// Also omit CFI junk (which is #defined away)
+
+// The difference between the 2 wrapper variants is that the first returns
+// through ecx and the 2nd returns through eax. The fastcc calling convention
+// uses ecx to pass arguments, and the C calling convention uses eax to pass
+// arguments with the 'inreg' attribute, so we make sure not to clobber it.
+// Returning through eax for fastcc and ecx for C clobbers the 'nest' parameter
+// breaking nested functions (which are not supported by clang in any case).
+
+void X86NaClCompilationCallback(void);
+asm(
+ ".text\n"
+ ".align 32\n"
+ ".globl " ASMPREFIX "X86NaClCompilationCallback\n"
+ TYPE_FUNCTION(X86NaClCompilationCallback)
+ ASMPREFIX "X86NaClCompilationCallback:\n"
+ "pushl %ebp\n"
+ "movl %esp, %ebp\n" // Standard prologue
+ "pushl %eax\n"
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ "pushl %ecx\n"
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ // FIXME: provide frame move information for xmm registers.
+ // This can be tricky, because CFA register is ebp (unaligned)
+ // and we need to produce offsets relative to it.
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86NaClCompilationCallback2\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ "movaps 32(%esp), %xmm2\n"
+ "movaps 16(%esp), %xmm1\n"
+ "movaps (%esp), %xmm0\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ "subl $12, %esp\n"
+ "popl %ecx\n"
+ "popl %edx\n"
+ "popl %eax\n"
+ "popl %ebp\n"
+ "popl %ecx\n"
+ "nacljmp %ecx\n"
+ SIZE(X86NaClCompilationCallback)
+);
+
+
+
+void X86NaClCompilationCallback_fastcc(void);
+asm(
+ ".text\n"
+ ".align 32\n"
+ ".globl " ASMPREFIX "X86NaClCompilationCallback_fastcc\n"
+ TYPE_FUNCTION(X86NaClCompilationCallback_fastcc)
+ ASMPREFIX "X86NaClCompilationCallback_fastcc:\n"
+ "pushl %ebp\n"
+ "movl %esp, %ebp\n" // Standard prologue
+ "pushl %eax\n"
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ "pushl %ecx\n"
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ // FIXME: provide frame move information for xmm registers.
+ // This can be tricky, because CFA register is ebp (unaligned)
+ // and we need to produce offsets relative to it.
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86NaClCompilationCallback2\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ "movaps 32(%esp), %xmm2\n"
+ "movaps 16(%esp), %xmm1\n"
+ "movaps (%esp), %xmm0\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ "subl $12, %esp\n"
+ "popl %ecx\n"
+ "popl %edx\n"
+ "popl %eax\n"
+ "popl %ebp\n"
+ "popl %eax\n"
+ "nacljmp %eax\n"
+ SIZE(X86NaClCompilationCallback_fastcc)
+);
+#endif
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call. This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+
+// A stub has the following format:
+// | Jump opcode (1 byte) | Jump target +22 bytes | 3 bytes of NOPs
+// | 18 bytes of NOPs | 1 halt | Call opcode (1 byte) | call target
+// The jump targets the call at the end of the bundle, which targets the
+// compilation callback. Once the compilation callback JITed the target
+// function it replaces the first 8 bytes of the stub in a single atomic
+// operation, retargeting the jump at the JITed function.
+
+static uint8_t *BundleRewriteBuffer;
+
+static void LLVM_ATTRIBUTE_USED
+X86NaClCompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+ // Get the return address from where the call instruction left it
+ intptr_t *RetAddrLoc = &StackPtr[1];
+ assert(*RetAddrLoc == RetAddr &&
+ "Could not find return address on the stack!");
+
+ // TODO: take a lock here. figure out whether it has to be the JIT lock or
+ // can be our own lock (or however we handle thread safety)
+#if 0
+ DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
+ << " ESP=" << (void*)StackPtr << "\n");
+#endif
+
+ intptr_t StubStart = RetAddr - 32;
+ // This probably isn't necessary. I believe the corresponding code in
+ // X86JITInfo is vestigial, and AFAICT no non-stub calls to the compilation
+ // callback are generated anywhere. Still it doesn't hurt as a sanity check
+ bool isStub = *((unsigned char*)StubStart) == 0xE9 &&
+ *((int32_t*)(StubStart + 1)) == 22 &&
+ *((unsigned char*)(StubStart + 26)) == 0xF4;
+
+ assert(isStub && "NaCl doesn't support rewriting non-stub callsites yet");
+
+ // Backtrack so RetAddr points inside the stub (so JITResolver can find
+ // which function to compile)
+ RetAddr -= 4;
+
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+ // Rewrite the stub's call target, so that we don't end up here every time we
+ // execute the call.
+
+ // Get the first 8 bytes of the stub
+ memcpy(BundleRewriteBuffer, (void *)(StubStart), 8);
+ // Point the jump at the newly-JITed code
+ *((intptr_t *)(BundleRewriteBuffer + 1)) = NewVal - (StubStart + 5);
+
+ // Copy the new code
+#ifdef __native_client__
+ if(nacl_dyncode_modify((void *)StubStart, BundleRewriteBuffer, 8)) {
+ report_fatal_error("dyncode_modify failed");
+ }
+#endif
+ // TODO: release the lock
+
+ // Change our return address to execute the new jump
+ *RetAddrLoc = StubStart;
+}
+
+}
+
+const int X86NaClJITInfo::kBundleSize;
+
+TargetJITInfo::LazyResolverFn
+X86NaClJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return X86NaClCompilationCallback;
+}
+
+X86NaClJITInfo::X86NaClJITInfo(X86TargetMachine &tm) : X86JITInfo(tm) {
+ // FIXME: does LLVM have some way of doing static initialization?
+#ifndef __pnacl__
+ if(posix_memalign((void **)&BundleRewriteBuffer, kBundleSize, kBundleSize))
+ report_fatal_error("Could not allocate aligned memory");
+#else
+ BundleRewriteBuffer = NULL;
+#endif
+
+ NopString = new uint8_t[kBundleSize];
+ for (int i = 0; i < kBundleSize; i++) NopString[i] = 0x90;
+ X86Hlt.ins = new uint8_t[1];
+ X86Hlt.ins[0] = 0xf4;
+ X86Hlt.len = 1;
+}
+
+X86NaClJITInfo::~X86NaClJITInfo() {
+ delete [] NopString;
+ delete [] X86Hlt.ins;
+}
+
+TargetJITInfo::StubLayout X86NaClJITInfo::getStubLayout() {
+ // NaCl stubs must be full bundles because calls still have to be aligned
+ // even if they don't return
+ StubLayout Result = {kBundleSize, kBundleSize};
+ return Result;
+}
+
+
+void *X86NaClJITInfo::emitFunctionStub(const Function* F, void *Target,
+ JITCodeEmitter &JCE) {
+ bool TargetsCC = Target == (void *)(intptr_t)X86NaClCompilationCallback;
+
+ // If we target the compilation callback, swap it for a different one for
+ // functions using the fastcc calling convention
+ if(TargetsCC && F->getCallingConv() == CallingConv::Fast) {
+ Target = (void *)(intptr_t)X86NaClCompilationCallback_fastcc;
+ }
+
+ void *Result = (void *)JCE.getCurrentPCValue();
+ assert(RoundUpToAlignment((uintptr_t)Result, kBundleSize) == (uintptr_t)Result
+ && "Unaligned function stub");
+ if (!TargetsCC) {
+ // Jump to the target
+ JCE.emitByte(0xE9);
+ JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4);
+ // Fill with Nops.
+ emitNopPadding(JCE, 27);
+ } else {
+ // Jump over 22 bytes
+ JCE.emitByte(0xE9);
+ JCE.emitWordLE(22);
+ // emit 3-bytes of nop to ensure an instruction boundary at 8 bytes
+ emitNopPadding(JCE, 3);
+ // emit 18 bytes of nop
+ emitNopPadding(JCE, 18);
+ // emit 1 byte of halt. This helps CompilationCallback tell whether
+ // we came from a stub or not
+ JCE.emitByte(X86Hlt.ins[0]);
+ // emit a call to the compilation callback
+ JCE.emitByte(0xE8);
+ JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4);
+ }
+ return Result;
+}
+
+// Relocations are the same as in X86, but the address being written
+// not the same as the address that the offset is relative to (see comment on
+// setRelocationBuffer in X86NaClJITInfo.h
+void X86NaClJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = RelocationBuffer + MR->getMachineCodeOffset();
+ void *RelocTargetPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((X86::RelocationType)MR->getRelocationType()) {
+ case X86::reloc_pcrel_word: {
+ // PC relative relocation, add the relocated value to the value already in
+ // memory, after we adjust it for where the PC is.
+ ResultPtr = ResultPtr -(intptr_t)RelocTargetPos - 4 - MR->getConstantVal();
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_picrel_word: {
+ // PIC base relative relocation, add the relocated value to the value
+ // already in memory, after we adjust it for where the PIC base is.
+ ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_absolute_word:
+ case X86::reloc_absolute_word_sext:
+ // Absolute relocation, just add the relocated value to the value already
+ // in memory.
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ case X86::reloc_absolute_dword:
+ *((intptr_t*)RelocPos) += ResultPtr;
+ break;
+ }
+ }
+}
+
+const uint8_t *X86NaClJITInfo::getNopSequence(size_t len) const {
+ // TODO(dschuff): use more efficient NOPs.
+ // Update emitNopPadding when it happens
+ assert((int)len <= kBundleSize &&
+ "Nop sequence can't be more than bundle size");
+ return NopString;
+}
+
+void X86NaClJITInfo::emitNopPadding(JITCodeEmitter &JCE, size_t len) {
+ for (size_t i = 0; i < len; i++) JCE.emitByte(NopString[i]);
+}
+
+const TargetJITInfo::HaltInstruction *X86NaClJITInfo::getHalt() const {
+ return &X86Hlt;
+}
+
+int X86NaClJITInfo::getBundleSize() const {
+ return kBundleSize;
+}
+
+int32_t X86NaClJITInfo::getJumpMask() const {
+ return FlagSfiX86JmpMask;
+}