Initial ARM/Thumb disassembler check-in. It consists of a tablgen backend

(RISCDisassemblerEmitter) which emits the decoder functions for ARM and Thumb, and the disassembler core which invokes the decoder function and builds up the MCInst based on the decoded Opcode. Added sub-formats to the NeonI/NeonXI instructions to further refine the NEONFrm instructions to help disassembly. We also changed the output of the addressing modes to omit the '+' from the assembler syntax #+/-<imm> or +/-<Rm>. See, for example, A8.6.57/58/60. And modified test cases to not expect '+' in +reg or #+num. For example, ; CHECK: ldr.w r9, [r7, #28] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98637 91177308-0d34-0410-b5e6-96231b3b80d8
author: Johnny Chen <johnny.chen@apple.com> 2010-03-16 16:36:54 +0000
committer: Johnny Chen <johnny.chen@apple.com> 2010-03-16 16:36:54 +0000
commit: d30a98e43ae18e1fc70a7dc748edf669d809c685 (patch)
tree: 808136df95741812c7297abfb604a5696499dad0 /utils/TableGen/RISCDisassemblerEmitter.cpp
parent: ea7f22c31d0d12923eaab6840322431cc0222ae9 (diff)
1 files changed, 1743 insertions, 0 deletions
diff --git a/utils/TableGen/RISCDisassemblerEmitter.cpp b/utils/TableGen/RISCDisassemblerEmitter.cpp
new file mode 100644
index 0000000000..35ad22a9dc
--- /dev/null
+++ b/utils/TableGen/RISCDisassemblerEmitter.cpp
@@ -0,0 +1,1743 @@
+//===- RISCDisassemblerEmitter.cpp - Disassembler Generator ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME: document
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "risc-disassembler-emitter"
+
+#include "RISCDisassemblerEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <iomanip>
+#include <vector>
+#include <cstdio>
+#include <map>
+#include <string>
+#include <sstream>
+
+using namespace llvm;
+
+////////////////////////////////////
+//  Utility classes / structures  //
+////////////////////////////////////
+
+// LLVM coding style
+#define INDENT_LEVEL 2
+
+/// Indenter - A little helper class to keep track of the indentation depth,
+/// while the instance object is being passed around.
+class Indenter {
+public:
+  Indenter() : depth(0) {}
+
+  void push() {
+    depth += INDENT_LEVEL;
+  }
+
+  void pop() {
+    if (depth >= INDENT_LEVEL)
+      depth -= INDENT_LEVEL;
+  }
+
+  // Conversion operator.
+  operator int () {
+    return depth;
+  }
+private:
+  uint8_t depth;
+};
+
+/////////////////////////
+//  Utility functions  //
+/////////////////////////
+
+static uint8_t byteFromBitsInit(BitsInit &init) {
+  int width = init.getNumBits();
+
+  assert(width <= 8 && "Field is too large for uint8_t!");
+
+  int index;
+  uint8_t mask = 0x01;
+
+  uint8_t ret = 0;
+
+  for (index = 0; index < width; index++) {
+    if (static_cast<BitInit*>(init.getBit(index))->getValue())
+      ret |= mask;
+
+    mask <<= 1;
+  }
+
+  return ret;
+}
+
+static uint8_t getByteField(const Record &def, const char *str) {
+  BitsInit *bits = def.getValueAsBitsInit(str);
+  return byteFromBitsInit(*bits);
+}
+
+static BitsInit &getBitsField(const Record &def, const char *str) {
+  BitsInit *bits = def.getValueAsBitsInit(str);
+  return *bits;
+}
+
+/// sameStringExceptEndingChar - Return true if the two strings differ only in
+/// the ending char.  ("VST4q8a", "VST4q8b", 'a', 'b') as input returns true.
+static
+bool sameStringExceptEndingChar(const std::string &LHS, const std::string &RHS,
+                                char lhc, char rhc) {
+
+  if (LHS.length() > 1 && RHS.length() > 1 && LHS.length() == RHS.length()) {
+    unsigned length = LHS.length();
+    return LHS.substr(0, length - 1) == RHS.substr(0, length - 1)
+      && LHS[length - 1] == lhc && RHS[length - 1] == rhc;
+  }
+
+  return false;
+}
+
+/// thumbInstruction - Determine whether we have a Thumb instruction.
+/// See also ARMInstrFormats.td.
+static bool thumbInstruction(uint8_t Form) {
+  return Form == 23;
+}
+
+// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
+// for a bit value.
+//
+// BIT_UNFILTERED is used as the init value for a filter position.  It is used
+// only for filter processings.
+typedef enum {
+  BIT_TRUE,      // '1'
+  BIT_FALSE,     // '0'
+  BIT_UNSET,     // '?'
+  BIT_UNFILTERED // unfiltered
+} bit_value_t;
+
+static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
+  if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
+    return bit->getValue() ? BIT_TRUE : BIT_FALSE;
+
+  // The bit is uninitialized.
+  return BIT_UNSET;
+}
+
+static void dumpBits(raw_ostream &o, BitsInit &bits) {
+  unsigned index;
+
+  for (index = bits.getNumBits(); index > 0; index--) {
+    switch (bitFromBits(bits, index - 1)) {
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    default:
+      assert(0 && "unexpected return value from bitFromBits");
+    }
+  }
+}
+
+/////////////
+//  Enums  //
+/////////////
+
+#define ARM_FORMATS                   \
+  ENTRY(ARM_FORMAT_PSEUDO,         0) \
+  ENTRY(ARM_FORMAT_MULFRM,         1) \
+  ENTRY(ARM_FORMAT_BRFRM,          2) \
+  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
+  ENTRY(ARM_FORMAT_DPFRM,          4) \
+  ENTRY(ARM_FORMAT_DPSOREGFRM,     5) \
+  ENTRY(ARM_FORMAT_LDFRM,          6) \
+  ENTRY(ARM_FORMAT_STFRM,          7) \
+  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
+  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
+  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
+  ENTRY(ARM_FORMAT_ARITHMISCFRM,  11) \
+  ENTRY(ARM_FORMAT_EXTFRM,        12) \
+  ENTRY(ARM_FORMAT_VFPUNARYFRM,   13) \
+  ENTRY(ARM_FORMAT_VFPBINARYFRM,  14) \
+  ENTRY(ARM_FORMAT_VFPCONV1FRM,   15) \
+  ENTRY(ARM_FORMAT_VFPCONV2FRM,   16) \
+  ENTRY(ARM_FORMAT_VFPCONV3FRM,   17) \
+  ENTRY(ARM_FORMAT_VFPCONV4FRM,   18) \
+  ENTRY(ARM_FORMAT_VFPCONV5FRM,   19) \
+  ENTRY(ARM_FORMAT_VFPLDSTFRM,    20) \
+  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 21) \
+  ENTRY(ARM_FORMAT_VFPMISCFRM,    22) \
+  ENTRY(ARM_FORMAT_THUMBFRM,      23) \
+  ENTRY(ARM_FORMAT_NEONFRM,       24) \
+  ENTRY(ARM_FORMAT_NEONGETLNFRM,  25) \
+  ENTRY(ARM_FORMAT_NEONSETLNFRM,  26) \
+  ENTRY(ARM_FORMAT_NEONDUPFRM,    27) \
+  ENTRY(ARM_FORMAT_LDSTEXFRM,     28) \
+  ENTRY(ARM_FORMAT_MISCFRM,       29) \
+  ENTRY(ARM_FORMAT_THUMBMISCFRM,  30)
+
+// ARM instruction format specifies the encoding used by the instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+  ARM_FORMATS
+  ARM_FORMAT_NA
+} ARMFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const char *stringForARMFormat(ARMFormat form) {
+#define ENTRY(n, v) case n: return #n;
+  switch(form) {
+    ARM_FORMATS
+  case ARM_FORMAT_NA:
+  default:
+    return "";
+  }
+#undef ENTRY
+}
+
+#define NS_FORMATS                              \
+  ENTRY(NS_FORMAT_NONE,                     0)  \
+  ENTRY(NS_FORMAT_VLDSTLane,                1)  \
+  ENTRY(NS_FORMAT_VLDSTLaneDbl,             2)  \
+  ENTRY(NS_FORMAT_VLDSTRQ,                  3)  \
+  ENTRY(NS_FORMAT_NVdImm,                   4)  \
+  ENTRY(NS_FORMAT_NVdVmImm,                 5)  \
+  ENTRY(NS_FORMAT_NVdVmImmVCVT,             6)  \
+  ENTRY(NS_FORMAT_NVdVmImmVDupLane,         7)  \
+  ENTRY(NS_FORMAT_NVdVmImmVSHLL,            8)  \
+  ENTRY(NS_FORMAT_NVectorShuffle,           9)  \
+  ENTRY(NS_FORMAT_NVectorShift,             10) \
+  ENTRY(NS_FORMAT_NVectorShift2,            11) \
+  ENTRY(NS_FORMAT_NVdVnVmImm,               12) \
+  ENTRY(NS_FORMAT_NVdVnVmImmVectorShift,    13) \
+  ENTRY(NS_FORMAT_NVdVnVmImmVectorExtract,  14) \
+  ENTRY(NS_FORMAT_NVdVnVmImmMulScalar,      15) \
+  ENTRY(NS_FORMAT_VTBL,                     16)
+
+// NEON instruction sub-format further classify the NEONFrm instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+  NS_FORMATS
+  NS_FORMAT_NA
+} NSFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const char *stringForNSFormat(NSFormat form) {
+#define ENTRY(n, v) case n: return #n;
+  switch(form) {
+    NS_FORMATS
+  case NS_FORMAT_NA:
+  default:
+    return "";
+  }
+#undef ENTRY
+}
+
+// Enums for the available target names.
+typedef enum {
+  TARGET_ARM = 0,
+  TARGET_THUMB
+} TARGET_NAME_t;
+
+class AbstractFilterChooser {
+public:
+  static TARGET_NAME_t TargetName;
+  static void setTargetName(TARGET_NAME_t tn) { TargetName = tn; }
+  virtual ~AbstractFilterChooser() {}
+  virtual void emitTop(raw_ostream &o, Indenter &i) = 0;
+  virtual void emitBot(raw_ostream &o, Indenter &i) = 0;
+};
+
+// Define the symbol here.
+TARGET_NAME_t AbstractFilterChooser::TargetName;
+
+template <unsigned tBitWidth>
+class FilterChooser : public AbstractFilterChooser {
+protected:
+  // Representation of the instruction to work on.
+  typedef bit_value_t insn_t[tBitWidth];
+
+  class Filter {
+  protected:
+    FilterChooser *Owner; // pointer without ownership
+    unsigned StartBit; // the starting bit position
+    unsigned NumBits; // number of bits to filter
+    bool Mixed; // a mixed region contains both set and unset bits
+
+    // Map of well-known segment value to the set of uid's with that value. 
+    std::map<uint64_t, std::vector<unsigned> > FilteredInstructions;
+
+    // Set of uid's with non-constant segment values.
+    std::vector<unsigned> VariableInstructions;
+
+    // Map of well-known segment value to its delegate.
+    std::map<unsigned, FilterChooser> FilterChooserMap;
+
+    // Number of instructions which fall under FilteredInstructions category.
+    unsigned NumFiltered;
+
+    // Keeps track of the last opcode in the filtered bucket.
+    unsigned LastOpcFiltered;
+
+    // Number of instructions which fall under VariableInstructions category.
+    unsigned NumVariable;
+
+  public:
+    unsigned getNumFiltered() { return NumFiltered; }
+    unsigned getNumVariable() { return NumVariable; }
+    unsigned getSingletonOpc() {
+      assert(NumFiltered == 1);
+      return LastOpcFiltered;
+    }
+    FilterChooser &getVariableFC() {
+      assert(NumFiltered == 1);
+      assert(FilterChooserMap.size() == 1);
+      return FilterChooserMap.find(-1)->second;
+    }
+
+    Filter(const Filter &f) :
+      Owner(f.Owner),
+      StartBit(f.StartBit),
+      NumBits(f.NumBits),
+      Mixed(f.Mixed),
+      FilteredInstructions(f.FilteredInstructions),
+      VariableInstructions(f.VariableInstructions),
+      FilterChooserMap(f.FilterChooserMap),
+      NumFiltered(f.NumFiltered),
+      LastOpcFiltered(f.LastOpcFiltered),
+      NumVariable(f.NumVariable) { }
+
+    Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
+           bool mixed) :
+      Owner(&owner),
+      StartBit(startBit),
+      NumBits(numBits),
+      Mixed(mixed)
+    {
+      assert(StartBit + NumBits - 1 < tBitWidth);
+
+      NumFiltered = 0;
+      LastOpcFiltered = 0;
+      NumVariable = 0;
+
+      for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
+        insn_t Insn;
+
+        // Populates the insn given the uid.
+        Owner->insnWithID(Insn, Owner->Opcodes[i]);
+
+        uint64_t Field;
+        // Scans the segment for possibly well-specified encoding bits.
+        bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits);
+
+        if (ok) {
+          // The encoding bits are well-known.  Lets add the uid of the
+          // instruction into the bucket keyed off the constant field value.
+          LastOpcFiltered = Owner->Opcodes[i];
+          FilteredInstructions[Field].push_back(LastOpcFiltered);
+          ++NumFiltered;
+        } else {
+          // Some of the encoding bit(s) are unspecfied.  This contributes to
+          // one additional member of "Variable" instructions.
+          VariableInstructions.push_back(Owner->Opcodes[i]);
+          ++NumVariable;
+        }
+      }
+
+      assert((FilteredInstructions.size() + VariableInstructions.size() > 0)
+             && "Filter returns no instruction categories");
+    }
+
+    // Divides the decoding task into sub tasks and delegates them to the
+    // inferior FilterChooser's.
+    //
+    // A special case arises when there's only one entry in the filtered
+    // instructions.  In order to unambiguously decode the singleton, we need to
+    // match the remaining undecoded encoding bits against the singleton.
+    void recurse() {
+      std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator;
+
+      bit_value_t BitValueArray[tBitWidth];
+      // Starts by inheriting our parent filter chooser's filter bit values.
+      memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
+
+      unsigned bitIndex;
+
+      if (VariableInstructions.size()) {
+        // Conservatively marks each segment position as BIT_UNSET.
+        for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
+          BitValueArray[StartBit + bitIndex] = BIT_UNSET;
+
+        // Delegates to an inferior filter chooser for futher processing on this
+        // group of instructions whose segment values are variable.
+        FilterChooserMap.insert(std::pair<unsigned, FilterChooser>(
+                                  (unsigned)-1,
+                                  FilterChooser(Owner->AllInstructions,
+                                                VariableInstructions,
+                                                BitValueArray,
+                                                *Owner)
+                                  ));
+      }
+
+      // No need to recurse for a singleton filtered instruction.
+      // See also Filter::emit().
+      if (getNumFiltered() == 1) {
+        //Owner->SingletonExists(LastOpcFiltered);
+        assert(FilterChooserMap.size() == 1);
+        return;
+      }
+        
+      // Otherwise, create sub choosers.
+      for (mapIterator = FilteredInstructions.begin();
+           mapIterator != FilteredInstructions.end();
+           mapIterator++) {
+
+        // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
+        for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
+          if (mapIterator->first & (1 << bitIndex))
+            BitValueArray[StartBit + bitIndex] = BIT_TRUE;
+          else
+            BitValueArray[StartBit + bitIndex] = BIT_FALSE;
+        }
+
+        // Delegates to an inferior filter chooser for futher processing on this
+        // category of instructions.
+        FilterChooserMap.insert(std::pair<unsigned, FilterChooser>(
+                                  mapIterator->first,
+                                  FilterChooser(Owner->AllInstructions,
+                                                mapIterator->second,
+                                                BitValueArray,
+                                                *Owner)
+                                  ));
+      }
+    }
+
+    // Emit code to decode instructions given a segment or segments of bits.
+    void emit(raw_ostream &o, Indenter &i) {
+      o.indent(i) << "// Check Inst{";
+
+      if (NumBits > 1)
+        o << (StartBit + NumBits - 1) << '-';
+
+      o << StartBit << "} ...\n";
+
+      o.indent(i) << "switch (fieldFromInstruction(insn, "
+                  << StartBit << ", " << NumBits << ")) {\n";
+
+      typename std::map<unsigned, FilterChooser>::iterator filterIterator;
+
+      bool DefaultCase = false;
+      for (filterIterator = FilterChooserMap.begin();
+           filterIterator != FilterChooserMap.end();
+           filterIterator++) {
+
+        // Field value -1 implies a non-empty set of variable instructions.
+        // See also recurse().
+        if (filterIterator->first == (unsigned)-1) {
+          DefaultCase = true;
+
+          o.indent(i) << "default:\n";
+          o.indent(i) << "  break; // fallthrough\n";
+
+          // Closing curly brace for the switch statement.
+          // This is unconventional because we want the default processing to be
+          // performed for the fallthrough cases as well, i.e., when the "cases"
+          // did not prove a decoded instruction.
+          o.indent(i) << "}\n";
+
+        } else {
+          o.indent(i) << "case " << filterIterator->first << ":\n";
+        }
+
+        // We arrive at a category of instructions with the same segment value.
+        // Now delegate to the sub filter chooser for further decodings.
+        // The case may fallthrough, which happens if the remaining well-known
+        // encoding bits do not match exactly.
+        if (!DefaultCase) i.push();
+        {
+          bool finished = filterIterator->second.emit(o, i);
+          // For top level default case, there's no need for a break statement.
+          if (Owner->isTopLevel() && DefaultCase)
+            break;
+          if (!finished)
+            o.indent(i) << "break;\n";
+        }
+        if (!DefaultCase) i.pop();
+      }
+
+      // If there is no default case, we still need to supply a closing brace.
+      if (!DefaultCase) {
+        // Closing curly brace for the switch statement.
+        o.indent(i) << "}\n";
+      }
+    }
+
+    // Returns the number of fanout produced by the filter.  More fanout implies
+    // the filter distinguishes more categories of instructions.
+    unsigned usefulness() const {
+      if (VariableInstructions.size())
+        return FilteredInstructions.size();
+      else
+        return FilteredInstructions.size() + 1;
+    }
+  }; // End of inner class Filter
+
+  friend class Filter;
+
+  // Vector of codegen instructions to choose our filter.
+  const std::vector<const CodeGenInstruction*> &AllInstructions;
+
+  // Vector of uid's for this filter chooser to work on.
+  const std::vector<unsigned> Opcodes;
+
+  // Vector of candidate filters.
+  std::vector<Filter> Filters;
+
+  // Array of bit values passed down from our parent.
+  // Set to all BIT_UNFILTERED's for Parent == NULL.
+  bit_value_t FilterBitValues[tBitWidth];
+
+  // Links to the FilterChooser above us in the decoding tree.
+  FilterChooser *Parent;
+  
+  // Index of the best filter from Filters.
+  int BestIndex;
+
+public:
+  FilterChooser(const FilterChooser &FC) :
+    AbstractFilterChooser(),
+    AllInstructions(FC.AllInstructions),
+    Opcodes(FC.Opcodes),
+    Filters(FC.Filters),
+    Parent(FC.Parent),
+    BestIndex(FC.BestIndex)
+  {
+    memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues));
+  }
+
+  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs) :
+    AllInstructions(Insts),
+    Opcodes(IDs),
+    Filters(),
+    Parent(NULL),
+    BestIndex(-1)
+  {
+    for (unsigned i = 0; i < tBitWidth; ++i)
+      FilterBitValues[i] = BIT_UNFILTERED;
+
+    doFilter();
+  }
+
+  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs,
+                bit_value_t (&ParentFilterBitValues)[tBitWidth],
+                FilterChooser &parent) :
+    AllInstructions(Insts),
+    Opcodes(IDs),
+    Filters(),
+    Parent(&parent),
+    BestIndex(-1)
+  {
+    for (unsigned i = 0; i < tBitWidth; ++i)
+      FilterBitValues[i] = ParentFilterBitValues[i];
+
+    doFilter();
+  }
+
+  // The top level filter chooser has NULL as its parent.
+  bool isTopLevel() { return Parent == NULL; }
+
+  // This provides an opportunity for target specific code emission.
+  void emitTopHook(raw_ostream &o, Indenter &i) {
+    if (TargetName == TARGET_ARM) {
+      // Emit code that references the ARMFormat data type.
+      o << "static const ARMFormat ARMFormats[] = {\n";
+      for (unsigned i = 0, e = AllInstructions.size(); i != e; ++i) {
+        const Record &Def = *(AllInstructions[i]->TheDef);
+        const std::string &Name = Def.getName();
+        if (Def.isSubClassOf("InstARM") || Def.isSubClassOf("InstThumb"))
+          o.indent(2) << 
+            stringForARMFormat((ARMFormat)getByteField(Def, "Form"));
+        else
+          o << "  ARM_FORMAT_NA";
+
+        o << ",\t// Inst #" << i << " = " << Name << '\n';
+      }
+      o << "  ARM_FORMAT_NA\t// Unreachable.\n";
+      o << "};\n\n";
+
+      // And emit code that references the NSFormat data type.
+      // This is meaningful only for NEONFrm instructions.
+      o << "static const NSFormat NSFormats[] = {\n";
+      for (unsigned i = 0, e = AllInstructions.size(); i != e; ++i) {
+        const Record &Def = *(AllInstructions[i]->TheDef);
+        const std::string &Name = Def.getName();
+        if (Def.isSubClassOf("NeonI") || Def.isSubClassOf("NeonXI"))
+          o.indent(2) << 
+            stringForNSFormat((NSFormat)getByteField(Def, "NSForm"));
+        else
+          o << "  NS_FORMAT_NA";
+
+        o << ",\t// Inst #" << i << " = " << Name << '\n';
+      }
+      o << "  NS_FORMAT_NA\t// Unreachable.\n";
+      o << "};\n\n";
+    }
+  }
+
+  // Emit the top level typedef and decodeInstruction() function.
+  void emitTop(raw_ostream &o, Indenter &i) {
+
+    // Run the target specific emit hook.
+    emitTopHook(o, i);
+
+    switch(tBitWidth) {
+    case 8:
+      o.indent(i) << "typedef uint8_t field_t;\n";
+      break;
+    case 16:
+      o.indent(i) << "typedef uint16_t field_t;\n";
+      break;
+    case 32:
+      o.indent(i) << "typedef uint32_t field_t;\n";
+      break;
+    case 64:
+      o.indent(i) << "typedef uint64_t field_t;\n";
+      break;
+    default:
+      assert(0 && "Unexpected instruction size!");
+    }
+
+    o << '\n';
+
+    o.indent(i) << "static field_t " <<
+    "fieldFromInstruction(field_t insn, unsigned startBit, unsigned numBits)\n";
+
+    o.indent(i) << "{\n";
+    i.push();
+    {
+      o.indent(i) << "assert(startBit + numBits <= " << tBitWidth
+                  << " && \"Instruction field out of bounds!\");\n";
+      o << '\n';
+      o.indent(i) << "field_t fieldMask;\n";
+      o << '\n';
+      o.indent(i) << "if (numBits == " << tBitWidth << ")\n";
+
+      i.push();
+      {
+        o.indent(i) << "fieldMask = (field_t)-1;\n";
+      }
+      i.pop();
+
+      o.indent(i) << "else\n";
+
+      i.push();
+      {
+        o.indent(i) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
+      }
+      i.pop();
+
+      o << '\n';
+      o.indent(i) << "return (insn & fieldMask) >> startBit;\n";
+    }
+    i.pop();
+    o.indent(i) << "}\n";
+
+    o << '\n';
+
+    o.indent(i) << "static uint16_t decodeInstruction(field_t insn) {\n";
+
+    i.push();
+    {
+      // Emits code to decode the instructions.
+      emit(o, i);
+
+      o << '\n';
+      o.indent(i) << "return 0;\n";
+    }
+    i.pop();
+
+    o.indent(i) << "}\n";
+
+    o << '\n';
+
+  }
+
+  // This provides an opportunity for target specific code emission after
+  // emitTop().
+  void emitBot(raw_ostream &o, Indenter &i) {
+    if (TargetName == TARGET_THUMB) {
+      // Emit code that decodes the Thumb ISA.
+      o.indent(i)
+        << "static uint16_t decodeThumbInstruction(field_t insn) {\n";
+
+      i.push();
+      {
+        // Emits code to decode the instructions.
+        emit(o, i);
+
+        o << '\n';
+        o.indent(i) << "return 0;\n";
+      }
+      i.pop();
+
+      o.indent(i) << "}\n";
+    }
+  }
+
+protected:
+  // Populates the insn given the uid.
+  void insnWithID(insn_t &Insn, unsigned Opcode) const {
+    assert(Opcode > 10);
+    BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
+
+    for (unsigned i = 0; i < tBitWidth; ++i)
+      Insn[i] = bitFromBits(Bits, i);
+  }
+
+  // Returns the record name.
+  const std::string &nameWithID(unsigned Opcode) const {
+    return AllInstructions[Opcode]->TheDef->getName();
+  }
+
+  // Populates the field of the insn given the start position and the number of
+  // consecutive bits to scan for.
+  //
+  // Returns false if and on the first uninitialized bit value encountered.
+  // Returns true, otherwise.
+  const bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
+                           unsigned NumBits) const {
+    Field = 0;
+
+    for (unsigned i = 0; i < NumBits; ++i) {
+      if (Insn[StartBit + i] == BIT_UNSET)
+        return false;
+
+      if (Insn[StartBit + i] == BIT_TRUE)
+        Field = Field | (1 << i);
+    }
+
+    return true;
+  }
+
+  void dumpFilterArray(raw_ostream &o, bit_value_t (&filter)[tBitWidth]) {
+    unsigned bitIndex;
+
+    for (bitIndex = tBitWidth; bitIndex > 0; bitIndex--) {
+      switch (filter[bitIndex - 1]) {
+      case BIT_UNFILTERED:
+        o << ".";
+        break;
+      case BIT_UNSET:
+        o << "_";
+        break;
+      case BIT_TRUE:
+        o << "1";
+        break;
+      case BIT_FALSE:
+        o << "0";
+        break;
+      }
+    }
+  }
+
+  void dumpStack(raw_ostream &o, const char *prefix) {
+    FilterChooser *current = this;
+
+    while (current) {
+      o << prefix;
+
+      dumpFilterArray(o, current->FilterBitValues);
+
+      o << '\n';
+
+      current = current->Parent;
+    }
+  }
+
+  Filter &bestFilter() {
+    assert(BestIndex != -1 && "BestIndex not set");
+    return Filters[BestIndex];
+  }
+
+  // States of our finite state machines.
+  typedef enum {
+    ATTR_NONE,
+    ATTR_FILTERED,
+    ATTR_ALL_SET,
+    ATTR_ALL_UNSET,
+    ATTR_MIXED
+  } bitAttr_t;
+
+  // Called from Filter::recurse() when singleton exists.  For debug purpose.
+  void SingletonExists(unsigned Opc) {
+
+    insn_t Insn0;
+    insnWithID(Insn0, Opc);
+
+    errs() << "Singleton exists: " << nameWithID(Opc)
+           << " with its decoding dominating ";
+    for (unsigned i = 0; i < Opcodes.size(); ++i) {
+      if (Opcodes[i] == Opc) continue;
+      errs() << nameWithID(Opcodes[i]) << ' ';
+    }
+    errs() << '\n';
+
+    dumpStack(errs(), "\t\t");
+    for (unsigned i = 0; i < Opcodes.size(); i++) {
+      const std::string &Name = nameWithID(Opcodes[i]);
+
+      errs() << '\t' << Name << " ";
+      dumpBits(errs(),
+               getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+      errs() << '\n';
+    }
+  }
+
+  bool ValueSet(bit_value_t V) {
+    return (V == BIT_TRUE || V == BIT_FALSE);
+  }
+  bool ValueNotSet(bit_value_t V) {
+    return (V == BIT_UNSET);
+  }
+  int Value(bit_value_t V) {
+    return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
+  }
+  bool PositionFiltered(unsigned i) {
+    return ValueSet(FilterBitValues[i]);
+  }
+
+  // Calculates the island(s) needed to decode the instruction.
+  unsigned getIslands(std::vector<unsigned> &StartBits,
+                      std::vector<unsigned> &EndBits,
+                      std::vector<uint64_t> &FieldVals, insn_t &Insn)
+  {
+    unsigned Num, BitNo;
+    Num = BitNo = 0;
+
+    uint64_t FieldVal = 0;
+
+    // 0: Init
+    // 1: Water
+    // 2: Island
+    int State = 0;
+    int Val = -1;
+
+    for (unsigned i = 0; i < tBitWidth; ++i) {
+      Val = Value(Insn[i]);
+      bool Filtered = PositionFiltered(i);
+      switch (State) {
+      default:
+        assert(0 && "Unreachable code!");
+        break;
+      case 0:
+      case 1:
+        if (Filtered || Val == -1)
+          State = 1; // Still in Water
+        else {
+          State = 2; // Into the Island
+          BitNo = 0;
+          StartBits.push_back(i);
+          FieldVal = Val;
+        }
+        break;
+      case 2:
+        if (Filtered || Val == -1) {
+          State = 1; // Into the Water
+          EndBits.push_back(i - 1);
+          FieldVals.push_back(FieldVal);
+          ++Num;
+        } else {
+          State = 2; // Still in Island
+          ++BitNo;
+          FieldVal = FieldVal | Val << BitNo;
+        }
+        break;
+      }
+    }
+    // If we are still in Island after the loop, do some housekeeping.
+    if (State == 2) {
+      EndBits.push_back(tBitWidth - 1);
+      FieldVals.push_back(FieldVal);
+      ++Num;
+    }
+
+    /*
+    printf("StartBits.size()=%u,EndBits.size()=%u,FieldVals.size()=%u,Num=%u\n",
+          (unsigned)StartBits.size(), (unsigned)EndBits.size(),
+          (unsigned)FieldVals.size(), Num);
+    */
+
+    assert(StartBits.size() == Num && EndBits.size() == Num &&
+           FieldVals.size() == Num);
+
+    return Num;
+  }
+
+  bool LdStCopEncoding1(unsigned Opc) {
+    const std::string &Name = nameWithID(Opc);
+    if (Name == "LDC_OFFSET" || Name == "LDC_OPTION" ||
+        Name == "LDC_POST" || Name == "LDC_PRE" ||
+        Name == "LDCL_OFFSET" || Name == "LDCL_OPTION" ||
+        Name == "LDCL_POST" || Name == "LDCL_PRE" ||
+        Name == "STC_OFFSET" || Name == "STC_OPTION" ||
+        Name == "STC_POST" || Name == "STC_PRE" ||
+        Name == "STCL_OFFSET" || Name == "STCL_OPTION" ||
+        Name == "STCL_POST" || Name == "STCL_PRE")
+      return true;
+    else
+      return false;
+  }
+
+  // Emits code to decode the singleton.  Return true if we have matched all the
+  // well-known bits.
+  bool emitSingletonDecoder(raw_ostream &o, Indenter &i, unsigned Opc) {
+
+    std::vector<unsigned> StartBits;
+    std::vector<unsigned> EndBits;
+    std::vector<uint64_t> FieldVals;
+    insn_t Insn;
+    insnWithID(Insn, Opc);
+
+    if (TargetName == TARGET_ARM && LdStCopEncoding1(Opc)) {
+      o.indent(i);
+      // A8.6.51 & A8.6.188
+      // If coproc = 0b101?, i.e, slice(insn, 11, 8) = 10 or 11, escape.
+      o << "if (fieldFromInstruction(insn, 9, 3) == 5) break; // fallthrough\n";
+    }
+
+    // Look for islands of undecoded bits of the singleton.
+    getIslands(StartBits, EndBits, FieldVals, Insn);
+
+    unsigned Size = StartBits.size();
+    unsigned I, NumBits;
+
+    // If we have matched all the well-known bits, just issue a return.
+    if (Size == 0) {
+      o.indent(i) << "return " << Opc << "; // " << nameWithID(Opc) << '\n';
+      return true;
+    }
+
+    // Otherwise, there are more decodings to be done!
+
+    // Emit code to match the island(s) for the singleton.
+    o.indent(i) << "// Check ";
+
+    for (I = Size; I != 0; --I) {
+      o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
+      if (I > 1)
+        o << "&& ";
+      else
+        o << "for singleton decoding...\n";
+    }
+
+    o.indent(i) << "if (";
+
+    for (I = Size; I != 0; --I) {
+      NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+      o << "fieldFromInstruction(insn, " << StartBits[I-1] << ", " << NumBits
+        << ") == " << FieldVals[I-1];
+      if (I > 1)
+        o << " && ";
+      else
+        o << ")\n";
+    }
+
+    o.indent(i) << "  return " << Opc << "; // " << nameWithID(Opc) << '\n';
+
+    return false;
+  }
+
+  // Emits code to decode the singleton, and then to decode the rest.
+  void emitSingletonDecoder(raw_ostream &o, Indenter &i, Filter & Best) {
+
+    unsigned Opc = Best.getSingletonOpc();
+
+    emitSingletonDecoder(o, i, Opc);
+
+    // Emit code for the rest.
+    o.indent(i) << "else\n";
+    i.push();
+    {
+      Best.getVariableFC().emit(o, i);
+    }
+    i.pop();
+  }
+
+  // Assign a single filter and run with it.
+  void runSingleFilter(FilterChooser &owner, unsigned startBit, unsigned numBit,
+                       bool mixed) {
+    Filters.clear();
+    Filter F(*this, startBit, numBit, true);
+    Filters.push_back(F);
+    BestIndex = 0; // Sole Filter instance to choose from.
+    bestFilter().recurse();
+  }
+
+  bool filterProcessor(bool AllowMixed, bool Greedy = true) {
+    Filters.clear();
+    BestIndex = -1;
+    unsigned numInstructions = Opcodes.size();
+
+    assert(numInstructions && "Filter created with no instructions");
+
+    // No further filtering is necessary.
+    if (numInstructions == 1)
+      return true;
+
+    // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
+    // instructions is 3.
+    if (AllowMixed && !Greedy) {
+      assert(numInstructions == 3);
+
+      for (unsigned i = 0; i < Opcodes.size(); ++i) {
+        std::vector<unsigned> StartBits;
+        std::vector<unsigned> EndBits;
+        std::vector<uint64_t> FieldVals;
+        insn_t Insn;
+
+        insnWithID(Insn, Opcodes[i]);
+
+        // Look for islands of undecoded bits of any instruction.
+        if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
+          // Found an instruction with island(s).  Now just assign a filter.
+          runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
+                          true);
+          return true;
+        }
+      }
+    }
+
+    unsigned bitIndex, insnIndex;
+
+    // We maintain tBitWidth copies of the bitAttrs automaton.
+    // The automaton consumes the corresponding bit from each
+    // instruction.
+    //
+    //   Input symbols: 0, 1, and _ (unset).
+    //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
+    //   Initial state: NONE.
+    //
+    // (NONE) ------- [01] -> (ALL_SET)
+    // (NONE) ------- _ ----> (ALL_UNSET)
+    // (ALL_SET) ---- [01] -> (ALL_SET)
+    // (ALL_SET) ---- _ ----> (MIXED)
+    // (ALL_UNSET) -- [01] -> (MIXED)
+    // (ALL_UNSET) -- _ ----> (ALL_UNSET)
+    // (MIXED) ------ . ----> (MIXED)
+    // (FILTERED)---- . ----> (FILTERED)
+
+    bitAttr_t bitAttrs[tBitWidth];
+
+    // FILTERED bit positions provide no entropy and are not worthy of pursuing.
+    // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
+    for (bitIndex = 0; bitIndex < tBitWidth; ++bitIndex)
+      if (FilterBitValues[bitIndex] == BIT_TRUE ||
+          FilterBitValues[bitIndex] == BIT_FALSE)
+        bitAttrs[bitIndex] = ATTR_FILTERED;
+      else
+        bitAttrs[bitIndex] = ATTR_NONE;
+
+    for (insnIndex = 0; insnInd
author	Johnny Chen <johnny.chen@apple.com>	2010-03-16 16:36:54 +0000
committer	Johnny Chen <johnny.chen@apple.com>	2010-03-16 16:36:54 +0000
commit	d30a98e43ae18e1fc70a7dc748edf669d809c685 (patch)
tree	808136df95741812c7297abfb604a5696499dad0 /utils/TableGen/RISCDisassemblerEmitter.cpp
parent	ea7f22c31d0d12923eaab6840322431cc0222ae9 (diff)