diff options
-rw-r--r-- | cmake/modules/LLVMLibDeps.cmake | 14 | ||||
-rw-r--r-- | lib/Target/X86/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.cpp | 438 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.h | 150 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.c | 1361 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 515 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h | 354 | ||||
-rw-r--r-- | lib/Target/X86/Makefile | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 4 | ||||
-rw-r--r-- | utils/TableGen/CMakeLists.txt | 2 | ||||
-rw-r--r-- | utils/TableGen/DisassemblerEmitter.cpp | 99 | ||||
-rw-r--r-- | utils/TableGen/X86DisassemblerShared.h | 37 | ||||
-rw-r--r-- | utils/TableGen/X86DisassemblerTables.cpp | 603 | ||||
-rw-r--r-- | utils/TableGen/X86DisassemblerTables.h | 291 | ||||
-rw-r--r-- | utils/TableGen/X86ModRMFilters.h | 197 | ||||
-rw-r--r-- | utils/TableGen/X86RecognizableInstr.cpp | 959 | ||||
-rw-r--r-- | utils/TableGen/X86RecognizableInstr.h | 237 |
18 files changed, 5255 insertions, 12 deletions
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake index 6a353547ec..40003659dd 100644 --- a/cmake/modules/LLVMLibDeps.cmake +++ b/cmake/modules/LLVMLibDeps.cmake @@ -2,7 +2,7 @@ set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMC) set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget) @@ -11,12 +11,12 @@ set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport LLVMSystem) set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem) set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget) set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa) set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget) set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) @@ -31,7 +31,7 @@ set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVM set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem) set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa) set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430CodeGen LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport) set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget) @@ -40,17 +40,17 @@ set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport) -set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem) set(MSVC_LIB_DEPS_LLVMSystem ) -set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget) set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget) set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem) diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 3ad65fbedc..4186fecf4e 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS X86.td) tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header) tablegen(X86GenRegisterNames.inc -gen-register-enums) tablegen(X86GenRegisterInfo.inc -gen-register-desc) +tablegen(X86GenDisassemblerTables.inc -gen-disassembler) tablegen(X86GenInstrNames.inc -gen-instr-enums) tablegen(X86GenInstrInfo.inc -gen-instr-desc) tablegen(X86GenAsmWriter.inc -gen-asm-writer) diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index b329e897b9..2a83a9c268 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -2,5 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMX86Disassembler X86Disassembler.cpp + X86DisassemblerDecoder.c ) add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 2ebbc9bdbd..99617e7a40 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -6,18 +6,450 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains code to translate the data produced by the decoder into +// MCInsts. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===// +#include "X86Disassembler.h" +#include "X86DisassemblerDecoder.h" +#include "X86InstrInfo.h" + +#include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" #include "llvm/Target/TargetRegistry.h" -#include "X86.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; +using namespace llvm::X86Disassembler; + +namespace llvm { + +// Fill-ins to make the compiler happy. These constants are never actually +// assigned; they are just filler to make an automatically-generated switch +// statement work. +namespace X86 { + enum { + BX_SI = 500, + BX_DI = 501, + BP_SI = 502, + BP_DI = 503, + sib = 504, + sib64 = 505 + }; +} + +} + +static void translateInstruction(MCInst &target, + InternalInstruction &source); + +X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) : + MCDisassembler(), + fMode(mode) { +} + +X86GenericDisassembler::~X86GenericDisassembler() { +} + +/// regionReader - a callback function that wraps the readByte method from +/// MemoryObject. +/// +/// @param arg - The generic callback parameter. In this case, this should +/// be a pointer to a MemoryObject. +/// @param byte - A pointer to the byte to be read. +/// @param address - The address to be read. +static int regionReader(void* arg, uint8_t* byte, uint64_t address) { + MemoryObject* region = static_cast<MemoryObject*>(arg); + return region->readByte(address, byte); +} + +/// logger - a callback function that wraps the operator<< method from +/// raw_ostream. +/// +/// @param arg - The generic callback parameter. This should be a pointe +/// to a raw_ostream. +/// @param log - A string to be logged. logger() adds a newline. +static void logger(void* arg, const char* log) { + if (!arg) + return; + + raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); + vStream << log << "\n"; +} + +// +// Public interface for the disassembler +// + +bool X86GenericDisassembler::getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream) const { + InternalInstruction internalInstr; + + int ret = decodeInstruction(&internalInstr, + regionReader, + (void*)®ion, + logger, + (void*)&vStream, + address, + fMode); + + if(ret) { + size = internalInstr.readerCursor - address; + return false; + } + else { + size = internalInstr.length; + translateInstruction(instr, internalInstr); + return true; + } +} + +// +// Private code that translates from struct InternalInstructions to MCInsts. +// + +/// translateRegister - Translates an internal register to the appropriate LLVM +/// register, and appends it as an operand to an MCInst. +/// +/// @param mcInst - The MCInst to append to. +/// @param reg - The Reg to append. +static void translateRegister(MCInst &mcInst, Reg reg) { +#define ENTRY(x) X86::x, + uint8_t llvmRegnums[] = { + ALL_REGS + 0 + }; +#undef ENTRY + + uint8_t llvmRegnum = llvmRegnums[reg]; + mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); +} + +/// translateImmediate - Appends an immediate operand to an MCInst. +/// +/// @param mcInst - The MCInst to append to. +/// @param immediate - The immediate value to append. +static void translateImmediate(MCInst &mcInst, uint64_t immediate) { + mcInst.addOperand(MCOperand::CreateImm(immediate)); +} + +/// translateRMRegister - Translates a register stored in the R/M field of the +/// ModR/M byte to its LLVM equivalent and appends it to an MCInst. +/// @param mcInst - The MCInst to append to. +/// @param insn - The internal instruction to extract the R/M field +/// from. +static void translateRMRegister(MCInst &mcInst, + InternalInstruction &insn) { + assert(insn.eaBase != EA_BASE_sib && insn.eaBase != EA_BASE_sib64 && + "A R/M register operand may not have a SIB byte"); + + switch (insn.eaBase) { + case EA_BASE_NONE: + llvm_unreachable("EA_BASE_NONE for ModR/M base"); + break; +#define ENTRY(x) case EA_BASE_##x: + ALL_EA_BASES +#undef ENTRY + llvm_unreachable("A R/M register operand may not have a base; " + "the operand must be a register."); + break; +#define ENTRY(x) \ + case EA_REG_##x: \ + mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; + ALL_REGS +#undef ENTRY + default: + llvm_unreachable("Unexpected EA base register"); + } +} + +/// translateRMMemory - Translates a memory operand stored in the Mod and R/M +/// fields of an internal instruction (and possibly its SIB byte) to a memory +/// operand in LLVM's format, and appends it to an MCInst. +/// +/// @param mcInst - The MCInst to append to. +/// @param insn - The instruction to extract Mod, R/M, and SIB fields +/// from. +static void translateRMMemory(MCInst &mcInst, + InternalInstruction &insn) { + // Addresses in an MCInst are represented as five operands: + // 1. basereg (register) The R/M base, or (if there is a SIB) the + // SIB base + // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified + // scale amount + // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) + // the index (which is multiplied by the + // scale amount) + // 4. displacement (immediate) 0, or the displacement if there is one + // 5. segmentreg (register) x86_registerNONE for now, but could be set + // if we have segment overrides + + MCOperand baseReg; + MCOperand scaleAmount; + MCOperand indexReg; + MCOperand displacement; + MCOperand segmentReg; + + if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { + if (insn.sibBase != SIB_BASE_NONE) { + switch (insn.sibBase) { + default: + llvm_unreachable("Unexpected sibBase"); +#define ENTRY(x) \ + case SIB_BASE_##x: \ + baseReg = MCOperand::CreateReg(X86::x); break; + ALL_SIB_BASES +#undef ENTRY + } + } else { + baseReg = MCOperand::CreateReg(0); + } + + if (insn.sibIndex != SIB_INDEX_NONE) { + switch (insn.sibIndex) { + default: + llvm_unreachable("Unexpected sibIndex"); +#define ENTRY(x) \ + case SIB_INDEX_##x: \ + indexReg = MCOperand::CreateReg(X86::x); break; + EA_BASES_32BIT + EA_BASES_64BIT +#undef ENTRY + } + } else { + indexReg = MCOperand::CreateReg(0); + } + + scaleAmount = MCOperand::CreateImm(insn.sibScale); + } else { + switch (insn.eaBase) { + case EA_BASE_NONE: + assert(insn.eaDisplacement != EA_DISP_NONE && + "EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); + + if (insn.mode == MODE_64BIT) + baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 + else + baseReg = MCOperand::CreateReg(0); + + indexReg = MCOperand::CreateReg(0); + break; + case EA_BASE_BX_SI: + baseReg = MCOperand::CreateReg(X86::BX); + indexReg = MCOperand::CreateReg(X86::SI); + break; + case EA_BASE_BX_DI: + baseReg = MCOperand::CreateReg(X86::BX); + indexReg = MCOperand::CreateReg(X86::DI); + break; + case EA_BASE_BP_SI: + baseReg = MCOperand::CreateReg(X86::BP); + indexReg = MCOperand::CreateReg(X86::SI); + break; + case EA_BASE_BP_DI: + baseReg = MCOperand::CreateReg(X86::BP); + indexReg = MCOperand::CreateReg(X86::DI); + break; + default: + indexReg = MCOperand::CreateReg(0); + switch (insn.eaBase) { + default: + llvm_unreachable("Unexpected eaBase"); + break; + // Here, we will use the fill-ins defined above. However, + // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and + // sib and sib64 were handled in the top-level if, so they're only + // placeholders to keep the compiler happy. +#define ENTRY(x) \ + case EA_BASE_##x: \ + baseReg = MCOperand::CreateReg(X86::x); break; + ALL_EA_BASES +#undef ENTRY +#define ENTRY(x) case EA_REG_##x: + ALL_REGS +#undef ENTRY + llvm_unreachable("A R/M memory operand may not be a register; " + "the base field must be a base."); + break; + } + } + } + + displacement = MCOperand::CreateImm(insn.displacement); + + static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { + 0, // SEG_OVERRIDE_NONE + X86::CS, + X86::SS, + X86::DS, + X86::ES, + X86::FS, + X86::GS + }; + + segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); + + mcInst.addOperand(baseReg); + mcInst.addOperand(scaleAmount); + mcInst.addOperand(indexReg); + mcInst.addOperand(displacement); + mcInst.addOperand(segmentReg); +} + +/// translateRM - Translates an operand stored in the R/M (and possibly SIB) +/// byte of an instruction to LLVM form, and appends it to an MCInst. +/// +/// @param mcInst - The MCInst to append to. +/// @param operand - The operand, as stored in the descriptor table. +/// @param insn - The instruction to extract Mod, R/M, and SIB fields +/// from. +static void translateRM(MCInst &mcInst, + OperandSpecifier &operand, + InternalInstruction &insn) { + switch (operand.type) { + default: + llvm_unreachable("Unexpected type for a R/M operand"); + case TYPE_R8: + case TYPE_R16: + case TYPE_R32: + case TYPE_R64: + case TYPE_Rv: + case TYPE_MM: + case TYPE_MM32: + case TYPE_MM64: + case TYPE_XMM: + case TYPE_XMM32: + case TYPE_XMM64: + case TYPE_XMM128: + case TYPE_DEBUGREG: + case TYPE_CR32: + case TYPE_CR64: + translateRMRegister(mcInst, insn); + break; + case TYPE_M: + case TYPE_M8: + case TYPE_M16: + case TYPE_M32: + case TYPE_M64: + case TYPE_M128: + case TYPE_M512: + case TYPE_Mv: + case TYPE_M32FP: + case TYPE_M64FP: + case TYPE_M80FP: + case TYPE_M16INT: + case TYPE_M32INT: + case TYPE_M64INT: + case TYPE_M1616: + case TYPE_M1632: + case TYPE_M1664: + translateRMMemory(mcInst, insn); + break; + } +} + +/// translateFPRegister - Translates a stack position on the FPU stack to its +/// LLVM form, and appends it to an MCInst. +/// +/// @param mcInst - The MCInst to append to. +/// @param stackPos - The stack position to translate. +static void translateFPRegister(MCInst &mcInst, + uint8_t stackPos) { + assert(stackPos < 8 && "Invalid FP stack position"); + + mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); +} + +/// translateOperand - Translates an operand stored in an internal instruction +/// to LLVM's format and appends it to an MCInst. +/// +/// @param mcInst - The MCInst to append to. +/// @param operand - The operand, as stored in the descriptor table. +/// @param insn - The internal instruction. +static void translateOperand(MCInst &mcInst, + OperandSpecifier &operand, + InternalInstruction &insn) { + switch (operand.encoding) { + default: + llvm_unreachable("Unhandled operand encoding during translation"); + case ENCODING_REG: + translateRegister(mcInst, insn.reg); + break; + case ENCODING_RM: + translateRM(mcInst, operand, insn); + break; + case ENCODING_CB: + case ENCODING_CW: + case ENCODING_CD: + case ENCODING_CP: + case ENCODING_CO: + case ENCODING_CT: + llvm_unreachable("Translation of code offsets isn't supported."); + case ENCODING_IB: + case ENCODING_IW: + case ENCODING_ID: + case ENCODING_IO: + case ENCODING_Iv: + case ENCODING_Ia: + translateImmediate(mcInst, + insn.immediates[insn.numImmediatesTranslated++]); + break; + case ENCODING_RB: + case ENCODING_RW: + case ENCODING_RD: + case ENCODING_RO: + translateRegister(mcInst, insn.opcodeRegister); + break; + case ENCODING_I: + translateFPRegister(mcInst, insn.opcodeModifier); + break; + case ENCODING_Rv: + translateRegister(mcInst, insn.opcodeRegister); + break; + case ENCODING_DUP: + translateOperand(mcInst, + insn.spec->operands[operand.type - TYPE_DUP0], + insn); + break; + } +} + +/// translateInstruction - Translates an internal instruction and all its +/// operands to an MCInst. +/// +/// @param mcInst - The MCInst to populate with the instruction's data. +/// @param insn - The internal instruction. +static void translateInstruction(MCInst &mcInst, + InternalInstruction &insn) { + assert(insn.spec); + + mcInst.setOpcode(insn.instructionID); + + int index; + + insn.numImmediatesTranslated = 0; + + for (index = 0; index < X86_MAX_OPERANDS; ++index) { + if (insn.spec->operands[index].encoding != ENCODING_NONE) + translateOperand(mcInst, insn.spec->operands[index], insn); + } +} static const MCDisassembler *createX86_32Disassembler(const Target &T) { - return 0; + return new X86Disassembler::X86_32Disassembler; } static const MCDisassembler *createX86_64Disassembler(const Target &T) { - return 0; + return new X86Disassembler::X86_64Disassembler; } extern "C" void LLVMInitializeX86Disassembler() { diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h new file mode 100644 index 0000000000..0e6e0b0e51 --- /dev/null +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -0,0 +1,150 @@ +//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and +// 64-bit X86 instruction sets. The main decode sequence for an assembly +// instruction in this disassembler is: +// +// 1. Read the prefix bytes and determine the attributes of the instruction. +// These attributes, recorded in enum attributeBits +// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM +// provides a mapping from bitmasks to contexts, which are represented by +// enum InstructionContext (ibid.). +// +// 2. Read the opcode, and determine what kind of opcode it is. The +// disassembler distinguishes four kinds of opcodes, which are enumerated in +// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte +// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a +// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. +// +// 3. Depending on the opcode type, look in one of four ClassDecision structures +// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which +// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get +// a ModRMDecision (ibid.). +// +// 4. Some instructions, such as escape opcodes or extended opcodes, or even +// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the +// ModR/M byte to complete decode. The ModRMDecision's type is an entry from +// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the +// ModR/M byte is required and how to interpret it. +// +// 5. After resolving the ModRMDecision, the disassembler has a unique ID +// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in +// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and +// meanings of its operands. +// +// 6. For each operand, its encoding is an entry from OperandEncoding +// (X86DisassemblerDecoderCommon.h) and its type is an entry from +// OperandType (ibid.). The encoding indicates how to read it from the +// instruction; the type indicates how to interpret the value once it has +// been read. For example, a register operand could be stored in the R/M +// field of the ModR/M byte, the REG field of the ModR/M byte, or added to +// the main opcode. This is orthogonal from its meaning (an GPR or an XMM +// register, for instance). Given this information, the operands can be +// extracted and interpreted. +// +// 7. As the last step, the disassembler translates the instruction information +// and operands into a format understandable by the client - in this case, an +// MCInst for use by the MC infrastructure. +// +// The disassembler is broken broadly into two parts: the table emitter that +// emits the instruction decode tables discussed above during compilation, and +// the disassembler itself. The table emitter is documented in more detail in +// utils/TableGen/X86DisassemblerEmitter.h. +// +// X86Disassembler.h contains the public interface for the disassembler, +// adhering to the MCDisassembler interface. +// X86Disassembler.cpp contains the code responsible for step 7, and for +// invoking the decoder to execute steps 1-6. +// X86DisassemblerDecoderCommon.h contains the definitions needed by both the +// table emitter and the disassembler. +// X86DisassemblerDecoder.h contains the public interface of the decoder, +// factored out into C for possible use by other projects. +// X86DisassemblerDecoder.c contains the source code of the decoder, which is +// responsible for steps 1-6. +// +//===----------------------------------------------------------------------===// + +#ifndef X86DISASSEMBLER_H +#define X86DISASSEMBLER_H + +#define INSTRUCTION_SPECIFIER_FIELDS \ + const char* name; + +#define INSTRUCTION_IDS \ + InstrUID* instructionIDs; + +#include "X86DisassemblerDecoderCommon.h" + +#undef INSTRUCTION_SPECIFIER_FIELDS +#undef INSTRUCTION_IDS + +#include "llvm/MC/MCDisassembler.h" + +struct InternalInstruction; + +namespace llvm { + +class MCInst; +class MemoryObject; +class raw_ostream; + +namespace X86Disassembler { + +/// X86GenericDisassembler - Generic disassembler for all X86 platforms. +/// All each platform class should have to do is subclass the constructor, and +/// provide a different disassemblerMode value. +class X86GenericDisassembler : public MCDisassembler { +protected: + /// Constructor - Initializes the disassembler. + /// + /// @param mode - The X86 architecture mode to decode for. + X86GenericDisassembler(DisassemblerMode mode); +public: + ~X86GenericDisassembler(); + + /// getInstruction - See MCDisassembler. + bool getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream) const; +private: + DisassemblerMode fMode; +}; + +/// X86_16Disassembler - 16-bit X86 disassembler. +class X86_16Disassembler : public X86GenericDisassembler { +public: + X86_16Disassembler() : + X86GenericDisassembler(MODE_16BIT) { + } +}; + +/// X86_16Disassembler - 32-bit X86 disassembler. +class X86_32Disassembler : public X86GenericDisassembler { +public: + X86_32Disassembler() : + X86GenericDisassembler(MODE_32BIT) { + } +}; + +/// X86_16Disassembler - 64-bit X86 disassembler. +class X86_64Disassembler : public X86GenericDisassembler { +public: + X86_64Disassembler() : + X86GenericDisassembler(MODE_64BIT) { + } +}; + +} // namespace X86Disassembler + +} // namespace llvm + +#endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c new file mode 100644 index 0000000000..99ae9cdd0b --- /dev/null +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -0,0 +1,1361 @@ +/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file is part of the X86 Disassembler. + * It contains the implementation of the instruction decoder. + * Documentation for the disassembler can be found in X86Disassembler.h. + * + *===----------------------------------------------------------------------===*/ + +#include <assert.h> /* for assert() */ +#include <stdarg.h> /* for va_*() */ +#include <stdio.h> /* for vsnprintf() */ +#include <stdlib.h> /* for exit() */ +#include <string.h> /* for bzero() */ + +#include "X86DisassemblerDecoder.h" + +#include "X86GenDisassemblerTables.inc" + +#define TRUE 1 +#define FALSE 0 + +#ifdef __GNUC__ +#define NORETURN __attribute__((noreturn)) +#else +#define NORETURN +#endif + +#define unreachable(s) \ + do { \ + fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s); \ + exit(-1); \ + } while (0); + +/* + * contextForAttrs - Client for the instruction context table. Takes a set of + * attributes and returns the appropriate decode context. + * + * @param attrMask - Attributes, from the enumeration attributeBits. + * @return - The InstructionContext to use when looking up an + * an instruction with these attributes. + */ +static inline InstructionContext contextForAttrs(uint8_t attrMask) { + return CONTEXTS_SYM[attrMask]; +} + +/* + * modRMRequired - Reads the appropriate instruction table to determine whether + * the ModR/M byte is required to decode a particular instruction. + * + * @param type - The opcode type (i.e., how many bytes it has). + * @param insnContext - The context for the instruction, as returned by + * contextForAttrs. + * @param opcode - The last byte of the instruction's opcode, not counting + * ModR/M extensions and escapes. + * @return - TRUE if the ModR/M byte is required, FALSE otherwise. + */ +static inline int modRMRequired(OpcodeType type, + InstructionContext insnContext, + uint8_t opcode) { + const struct ContextDecision* decision; + + switch (type) { + case ONEBYTE: + decision = &ONEBYTE_SYM; + break; + case TWOBYTE: + decision = &TWOBYTE_SYM; + break; + case THREEBYTE_38: + decision = &THREEBYTE38_SYM; + break; + case THREEBYTE_3A: + decision = &THREEBYTE3A_SYM; + break; + } + + return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. + modrm_type != MODRM_ONEENTRY; + + unreachable("Unknown opcode type"); + return 0; +} + +/* + * decode - Reads the appropriate instruction table to obtain the unique ID of + * an instruction. + * + * @param type - See modRMRequired(). + * @param insnContext - See modRMRequired(). + * @param opcode - See modRMRequired(). + * @param modRM - The ModR/M byte if required, or any value if not. + */ +static inline InstrUID decode(OpcodeType type, + InstructionContext insnContext, + uint8_t opcode, + uint8_t modRM) { + struct ModRMDecision* dec; + + switch (type) { + default: + unreachable("Unknown opcode type"); + case ONEBYTE: + dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case TWOBYTE: + dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case THREEBYTE_38: + dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case THREEBYTE_3A: + dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + } + + switch (dec->modrm_type) { + default: + unreachable("Corrupt table! Unknown modrm_type"); + case MODRM_ONEENTRY: + return dec->instructionIDs[0]; + case MODRM_SPLITRM: + if (modFromModRM(modRM) == 0x3) + return dec->instructionIDs[1]; + else + return dec->instructionIDs[0]; + case MODRM_FULL: + return dec->instructionIDs[modRM]; + } + + return 0; +} + +/* + * specifierForUID - Given a UID, returns the name and operand specification for + * that instruction. + * + * @param uid - The unique ID for the instruction. This should be returned by + * decode(); specifierForUID will not check bounds. + * @return - A pointer to the specification for that instruction. + */ +static inline struct InstructionSpecifier* specifierForUID(I |