diff options
Diffstat (limited to 'lib/Target/X86/X86NaClJITInfo.cpp')
-rw-r--r-- | lib/Target/X86/X86NaClJITInfo.cpp | 393 |
1 files changed, 393 insertions, 0 deletions
diff --git a/lib/Target/X86/X86NaClJITInfo.cpp b/lib/Target/X86/X86NaClJITInfo.cpp new file mode 100644 index 0000000000..e5ccbf960d --- /dev/null +++ b/lib/Target/X86/X86NaClJITInfo.cpp @@ -0,0 +1,393 @@ +//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the X86 target on Native Client +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "X86NaClJITInfo.h" +#include "X86Relocations.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include <cstdlib> +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Disassembler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Valgrind.h" +#ifdef __native_client__ +#include <nacl/nacl_dyncode.h> +#endif + +using namespace llvm; + +extern cl::opt<int> FlagSfiX86JmpMask; + +// Determine the platform we're running on +#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64) +# define X86_64_JIT +#elif defined(__i386__) || defined(i386) || defined(_M_IX86) +# define X86_32_JIT +#elif defined(__pnacl__) +#warning "PNaCl does not yet have JIT support" +#else +#error "Should not be building X86NaClJITInfo on non-x86" +// TODO(dschuff): make this work under pnacl self-build? +#endif + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +# define SIZE(sym) ".size " #sym ", . - " #sym "\n" +# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n" + +void X86NaClJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + // We don't know the original instruction boundaries, so we replace the + // whole bundle. + uint8_t buf[kBundleSize]; + buf[0] = 0xE9; // Emit JMP opcode. + intptr_t OldAddr = ((uintptr_t)Old + 1); + uint32_t NewOffset = (intptr_t)New - OldAddr - 4;// PC-relative offset of new + *((uint32_t*)(buf + 1)) = NewOffset; + memcpy(buf + 5, getNopSequence(kBundleSize - 5), kBundleSize - 5); + +#ifdef __native_client__ + if(nacl_dyncode_create(Old, buf, kBundleSize)) { + report_fatal_error("machine code replacement failed"); + } +#endif + + // X86 doesn't need to invalidate the processor cache, so just invalidate + // Valgrind's cache directly. + sys::ValgrindDiscardTranslations(Old, 5); +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +extern "C" { +#if defined(X86_64_JIT) || defined(__pnacl__) || !defined(__native_client__) +void X86NaClCompilationCallback(void) { +//TODO(dschuff): implement for X86-64 +} +void X86NaClCompilationCallback_fastcc(void) { +//TODO(dschuff): implement for X86-64 +} +#else +// Chrome system requirements include PIII, So SSE is present. +// For now this is the same as X86CompilationCallback_SSE +// In the future we could emit this rather than defining it with asm, for +// compatibility with pnacl self-build +// Also omit CFI junk (which is #defined away) + +// The difference between the 2 wrapper variants is that the first returns +// through ecx and the 2nd returns through eax. The fastcc calling convention +// uses ecx to pass arguments, and the C calling convention uses eax to pass +// arguments with the 'inreg' attribute, so we make sure not to clobber it. +// Returning through eax for fastcc and ecx for C clobbers the 'nest' parameter +// breaking nested functions (which are not supported by clang in any case). + +void X86NaClCompilationCallback(void); +asm( + ".text\n" + ".align 32\n" + ".globl " ASMPREFIX "X86NaClCompilationCallback\n" + TYPE_FUNCTION(X86NaClCompilationCallback) + ASMPREFIX "X86NaClCompilationCallback:\n" + "pushl %ebp\n" + "movl %esp, %ebp\n" // Standard prologue + "pushl %eax\n" + "pushl %edx\n" // Save EAX/EDX/ECX + "pushl %ecx\n" + "andl $-16, %esp\n" // Align ESP on 16-byte boundary + // Save all XMM arg registers + "subl $64, %esp\n" + // FIXME: provide frame move information for xmm registers. + // This can be tricky, because CFA register is ebp (unaligned) + // and we need to produce offsets relative to it. + "movaps %xmm0, (%esp)\n" + "movaps %xmm1, 16(%esp)\n" + "movaps %xmm2, 32(%esp)\n" + "movaps %xmm3, 48(%esp)\n" + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "X86NaClCompilationCallback2\n" + "addl $16, %esp\n" + "movaps 48(%esp), %xmm3\n" + "movaps 32(%esp), %xmm2\n" + "movaps 16(%esp), %xmm1\n" + "movaps (%esp), %xmm0\n" + "movl %ebp, %esp\n" // Restore ESP + "subl $12, %esp\n" + "popl %ecx\n" + "popl %edx\n" + "popl %eax\n" + "popl %ebp\n" + "popl %ecx\n" + "nacljmp %ecx\n" + SIZE(X86NaClCompilationCallback) +); + + + +void X86NaClCompilationCallback_fastcc(void); +asm( + ".text\n" + ".align 32\n" + ".globl " ASMPREFIX "X86NaClCompilationCallback_fastcc\n" + TYPE_FUNCTION(X86NaClCompilationCallback_fastcc) + ASMPREFIX "X86NaClCompilationCallback_fastcc:\n" + "pushl %ebp\n" + "movl %esp, %ebp\n" // Standard prologue + "pushl %eax\n" + "pushl %edx\n" // Save EAX/EDX/ECX + "pushl %ecx\n" + "andl $-16, %esp\n" // Align ESP on 16-byte boundary + // Save all XMM arg registers + "subl $64, %esp\n" + // FIXME: provide frame move information for xmm registers. + // This can be tricky, because CFA register is ebp (unaligned) + // and we need to produce offsets relative to it. + "movaps %xmm0, (%esp)\n" + "movaps %xmm1, 16(%esp)\n" + "movaps %xmm2, 32(%esp)\n" + "movaps %xmm3, 48(%esp)\n" + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "X86NaClCompilationCallback2\n" + "addl $16, %esp\n" + "movaps 48(%esp), %xmm3\n" + "movaps 32(%esp), %xmm2\n" + "movaps 16(%esp), %xmm1\n" + "movaps (%esp), %xmm0\n" + "movl %ebp, %esp\n" // Restore ESP + "subl $12, %esp\n" + "popl %ecx\n" + "popl %edx\n" + "popl %eax\n" + "popl %ebp\n" + "popl %eax\n" + "nacljmp %eax\n" + SIZE(X86NaClCompilationCallback_fastcc) +); +#endif + +/// X86CompilationCallback2 - This is the target-specific function invoked by the +/// function stub when we did not know the real target of a call. This function +/// must locate the start of the stub or call site and pass it into the JIT +/// compiler function. + +// A stub has the following format: +// | Jump opcode (1 byte) | Jump target +22 bytes | 3 bytes of NOPs +// | 18 bytes of NOPs | 1 halt | Call opcode (1 byte) | call target +// The jump targets the call at the end of the bundle, which targets the +// compilation callback. Once the compilation callback JITed the target +// function it replaces the first 8 bytes of the stub in a single atomic +// operation, retargeting the jump at the JITed function. + +static uint8_t *BundleRewriteBuffer; + +static void LLVM_ATTRIBUTE_USED +X86NaClCompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { + // Get the return address from where the call instruction left it + intptr_t *RetAddrLoc = &StackPtr[1]; + assert(*RetAddrLoc == RetAddr && + "Could not find return address on the stack!"); + + // TODO: take a lock here. figure out whether it has to be the JIT lock or + // can be our own lock (or however we handle thread safety) +#if 0 + DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr + << " ESP=" << (void*)StackPtr << "\n"); +#endif + + intptr_t StubStart = RetAddr - 32; + // This probably isn't necessary. I believe the corresponding code in + // X86JITInfo is vestigial, and AFAICT no non-stub calls to the compilation + // callback are generated anywhere. Still it doesn't hurt as a sanity check + bool isStub = *((unsigned char*)StubStart) == 0xE9 && + *((int32_t*)(StubStart + 1)) == 22 && + *((unsigned char*)(StubStart + 26)) == 0xF4; + + assert(isStub && "NaCl doesn't support rewriting non-stub callsites yet"); + + // Backtrack so RetAddr points inside the stub (so JITResolver can find + // which function to compile) + RetAddr -= 4; + + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr); + + // Rewrite the stub's call target, so that we don't end up here every time we + // execute the call. + + // Get the first 8 bytes of the stub + memcpy(BundleRewriteBuffer, (void *)(StubStart), 8); + // Point the jump at the newly-JITed code + *((intptr_t *)(BundleRewriteBuffer + 1)) = NewVal - (StubStart + 5); + + // Copy the new code +#ifdef __native_client__ + if(nacl_dyncode_modify((void *)StubStart, BundleRewriteBuffer, 8)) { + report_fatal_error("dyncode_modify failed"); + } +#endif + // TODO: release the lock + + // Change our return address to execute the new jump + *RetAddrLoc = StubStart; +} + +} + +const int X86NaClJITInfo::kBundleSize; + +TargetJITInfo::LazyResolverFn +X86NaClJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + return X86NaClCompilationCallback; +} + +X86NaClJITInfo::X86NaClJITInfo(X86TargetMachine &tm) : X86JITInfo(tm) { + // FIXME: does LLVM have some way of doing static initialization? +#ifndef __pnacl__ + if(posix_memalign((void **)&BundleRewriteBuffer, kBundleSize, kBundleSize)) + report_fatal_error("Could not allocate aligned memory"); +#else + BundleRewriteBuffer = NULL; +#endif + + NopString = new uint8_t[kBundleSize]; + for (int i = 0; i < kBundleSize; i++) NopString[i] = 0x90; + X86Hlt.ins = new uint8_t[1]; + X86Hlt.ins[0] = 0xf4; + X86Hlt.len = 1; +} + +X86NaClJITInfo::~X86NaClJITInfo() { + delete [] NopString; + delete [] X86Hlt.ins; +} + +TargetJITInfo::StubLayout X86NaClJITInfo::getStubLayout() { + // NaCl stubs must be full bundles because calls still have to be aligned + // even if they don't return + StubLayout Result = {kBundleSize, kBundleSize}; + return Result; +} + + +void *X86NaClJITInfo::emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE) { + bool TargetsCC = Target == (void *)(intptr_t)X86NaClCompilationCallback; + + // If we target the compilation callback, swap it for a different one for + // functions using the fastcc calling convention + if(TargetsCC && F->getCallingConv() == CallingConv::Fast) { + Target = (void *)(intptr_t)X86NaClCompilationCallback_fastcc; + } + + void *Result = (void *)JCE.getCurrentPCValue(); + assert(RoundUpToAlignment((uintptr_t)Result, kBundleSize) == (uintptr_t)Result + && "Unaligned function stub"); + if (!TargetsCC) { + // Jump to the target + JCE.emitByte(0xE9); + JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4); + // Fill with Nops. + emitNopPadding(JCE, 27); + } else { + // Jump over 22 bytes + JCE.emitByte(0xE9); + JCE.emitWordLE(22); + // emit 3-bytes of nop to ensure an instruction boundary at 8 bytes + emitNopPadding(JCE, 3); + // emit 18 bytes of nop + emitNopPadding(JCE, 18); + // emit 1 byte of halt. This helps CompilationCallback tell whether + // we came from a stub or not + JCE.emitByte(X86Hlt.ins[0]); + // emit a call to the compilation callback + JCE.emitByte(0xE8); + JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4); + } + return Result; +} + +// Relocations are the same as in X86, but the address being written +// not the same as the address that the offset is relative to (see comment on +// setRelocationBuffer in X86NaClJITInfo.h +void X86NaClJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = RelocationBuffer + MR->getMachineCodeOffset(); + void *RelocTargetPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((X86::RelocationType)MR->getRelocationType()) { + case X86::reloc_pcrel_word: { + // PC relative relocation, add the relocated value to the value already in + // memory, after we adjust it for where the PC is. + ResultPtr = ResultPtr -(intptr_t)RelocTargetPos - 4 - MR->getConstantVal(); + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + } + case X86::reloc_picrel_word: { + // PIC base relative relocation, add the relocated value to the value + // already in memory, after we adjust it for where the PIC base is. + ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal()); + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + } + case X86::reloc_absolute_word: + case X86::reloc_absolute_word_sext: + // Absolute relocation, just add the relocated value to the value already + // in memory. + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + case X86::reloc_absolute_dword: + *((intptr_t*)RelocPos) += ResultPtr; + break; + } + } +} + +const uint8_t *X86NaClJITInfo::getNopSequence(size_t len) const { + // TODO(dschuff): use more efficient NOPs. + // Update emitNopPadding when it happens + assert((int)len <= kBundleSize && + "Nop sequence can't be more than bundle size"); + return NopString; +} + +void X86NaClJITInfo::emitNopPadding(JITCodeEmitter &JCE, size_t len) { + for (size_t i = 0; i < len; i++) JCE.emitByte(NopString[i]); +} + +const TargetJITInfo::HaltInstruction *X86NaClJITInfo::getHalt() const { + return &X86Hlt; +} + +int X86NaClJITInfo::getBundleSize() const { + return kBundleSize; +} + +int32_t X86NaClJITInfo::getJumpMask() const { + return FlagSfiX86JmpMask; +} |