// Various tools for parsing llvm

// Simple #if/else/endif preprocessing for a file. Checks if the
// ident checked is true in our global.
function preprocess(text) {
  var lines = text.split('\n');
  var ret = '';
  var show = true;
  for (var i = 0; i < lines.length; i++) {
    var line = lines[i];
    if (line[0] != '#') {
      if (show) {
        ret += line + '\n';
      }
    } else {
      if (line[1] == 'i') { // if
        var ident = line.substr(4);
        show = !!this[ident];
      } else if (line[2] == 'l') { // else
        show = !show;
      } else if (line[2] == 'n') { // endif
        show = true;
      } else {
        throw "Unclear preprocessor command: " + line;
      }
    }
  }
  return ret;
}

function addPointing(type) { return type + '*' }
function removePointing(type, num) {
  if (num === 0) return type;
  return type.substr(0, type.length-(num ? num : 1))
}

function pointingLevels(type) {
  if (!type) return 0;
  var ret = 0;
  var len1 = type.length - 1;
  while (type[len1-ret] === '*') {
    ret ++;
  }
  return ret;
}

function toNiceIdent(ident) {
  if (parseFloat(ident) == ident) return ident;
  if (ident == 'null') return '0'; // see parseNumerical
  return ident.replace(/[" \.@%:<>,\*]/g, '_');
}

function isNumberType(type) {
  var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double'];
  return types.indexOf(type) != -1;
}

function isStructPointerType(type) {
  // This test is necessary for clang - in llvm-gcc, we
  // could check for %struct. The downside is that %1 can
  // be either a variable or a structure, and we guess it is
  // a struct, which can lead to |call i32 %5()| having
  // |%5()| as a function call (like |i32 (i8*)| etc.). So
  // we must check later on, in call(), where we have more
  // context, to differentiate such cases.
  // A similar thing happns in isStructType()
  return !isNumberType(type) && type[0] == '%';
}

function isStructType(type) {
  if (isPointerType(type)) return false;
  if (new RegExp(/^\[\d+\ x\ (.*)\]/g).test(type)) return true; // [15 x ?] blocks. Like structs
  // See comment in isStructPointerType()
  return !isNumberType(type) && type[0] == '%';
}

function isPointerType(type) { // TODO!
  return pointingLevels(type) > 0;
}

function isVoidType(type) {
  return type == 'void';
}

function isType(type) { // TODO!
  return isVoidType(type) || isNumberType(type) || isStructType(type) || isPointerType(type);
}

// Detects a function definition, ([...|type,[type,...]])
function isFunctionDef(token) {
  var text = token.text;
  var pointing = pointingLevels(text);
  var nonPointing = removePointing(text, pointing);
  if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')')
    return false;
  if (nonPointing == '(...)') return true;
  if (!token.item) return false;
  var fail = false;
  splitTokenList(token.item[0].tokens).forEach(function(segment) {
    var subtoken = segment[0];
    fail = fail || !isType(subtoken.text) || segment.length > 1;
  });
  return !fail;
}

function addIdent(token) {
  token.ident = token.text;
  return token;
}

function combineTokens(tokens) {
  var ret = {
    lineNum: tokens[0].lineNum,
    text: '',
    tokens: [],
  };
  tokens.forEach(function(token) {
    ret.text += token.text;
    ret.tokens.push(token);
  });
  return ret;
}

function compareTokens(a, b) {
  var aId = a.__uid__;
  var bId = b.__uid__;
  a.__uid__ = 0;
  b.__uid__ = 0;
  var ret = JSON.stringify(a) == JSON.stringify(b);
  a.__uid__ = aId;
  b.__uid__ = bId;
  return ret;
}

function getTokenIndexByText(tokens, text) {
  var i = 0;
  while (tokens[i].text != ';') i++;
  return i;
}

function findTokenText(item, text) {
  for (var i = 0; i < item.tokens.length; i++) {
    if (item.tokens[i].text == text) return i;
  }
  return -1;
}

// Splits a list of tokens separated by commas. For example, a list of arguments in a function call
function splitTokenList(tokens) {
  if (tokens.length == 0) return [];
  if (tokens.slice(-1)[0].text != ',') tokens.push({text:','});
  var ret = [];
  var seg = [];
  tokens.forEach(function(token) {
    if (token.text == ',') {
      ret.push(seg);
      seg = [];
    } else {
      seg.push(token);
    }
  });
  return ret;
}

// Splits an item, with the intent of later reintegration
function splitItem(parent, childSlot, copySlots) {
  if (!copySlots) copySlots = [];
  if (!parent[childSlot]) parent[childSlot] = {};
  var child = parent[childSlot];
  parent[childSlot] = null;
  child.parentUid = parent.__uid__;
  child.parentSlot = childSlot;
  child.parentLineNum = child.lineNum = parent.lineNum;
  copySlots.forEach(function(slot) { child[slot] = parent[slot] });
  return {
    parent: parent,
    child: child,
  };
}

function makeReintegrator(afterFunc) {
  // reintegration - find intermediate representation-parsed items and
  // place back in parents TODO: Optimize this code to optimal O(..)
  return {
    process: function(items) {
      var ret = [];
      for (var i = 0; i < items.length; i++) {
        var found = false;
        if (items[i] && items[i].parentSlot) {
          var child = items[i];
          for (var j = 0; j < items.length; j++) {
            if (items[j] && items[j].lineNum == items[i].parentLineNum) {
              var parent = items[j];
              // process the pair
              parent[child.parentSlot] = child;
              delete child.parentLineNum;
              afterFunc.call(this, parent, child);

              items[i] = null;
              items[j] = null;
              found = true;
              break;
            }
          }
        }
      }
      this.forwardItems(items.filter(function(item) { return !!item }), this.name_); // next time hopefully
      return ret;
    }
  };
}

function parseParamTokens(params) {
  if (params.length === 0) return [];
  var ret = [];
  if (params[params.length-1].text != ',') {
    params.push({ text: ',' });
  }
  var absIndex = 0;
  while (params.length > 0) {
    var i = 0;
    while (params[i].text != ',') i++;
    var segment = params.slice(0, i);
    params = params.slice(i+1);
    segment = cleanSegment(segment);
    if (segment.length == 1) {
      if (segment[0].text == '...') {
        ret.push({
          intertype: 'varargs',
        });
      } else {
        // Clang sometimes has a parameter with just a type,
        // no name... the name is implied to be %{the index}
        ret.push({
          intertype: 'value',
          type: segment[0],
          value: null,
          ident: '_' + absIndex,
        });
      }
    } else if (segment[1].text === 'getelementptr') {
      ret.push(parseGetElementPtr(segment));
    } else if (segment[1].text === 'bitcast') {
      ret.push(parseBitcast(segment));
    } else {
      if (segment[2] && segment[2].text == 'to') { // part of bitcast params
        segment = segment.slice(0, 2);
      }
      while (segment.length > 2) {
        segment[0].text += segment[1].text;
        segment.splice(1, 1); // TODO: merge tokens nicely
      }
      ret.push({
        intertype: 'value',
        type: segment[0],
        value: segment[1],
        ident: segment[1].text,
      });
      //          } else {
      //            throw "what is this params token? " + JSON.stringify(segment);
    }
    absIndex ++;
  }
  return ret;
}

function cleanSegment(segment) {
  if (segment.length == 1) return segment;
  while (['noalias', 'sret', 'nocapture', 'nest', 'zeroext', 'signext'].indexOf(segment[1].text) != -1) {
    segment.splice(1, 1);
  }
  return segment;
}

// Expects one of the several LVM getelementptr formats:
// a qualifier, a type, a null, then an () item with tokens
function parseGetElementPtr(segment) {
//print("Parse GTP: " + dump(segment));
  segment = segment.slice(0);
  segment = cleanSegment(segment);
  assertTrue(['inreg', 'byval'].indexOf(segment[1].text) == -1);
  //dprint('// zz: ' + dump(segment) + '\n\n\n');
  var ret = {
    intertype: 'getelementptr',
    type: segment[0],
    params: parseParamTokens(segment[3].item[0].tokens),
  };
  ret.ident = toNiceIdent(ret.params[0].ident);
  return ret;
}

// TODO: use this
function parseBitcast(segment) {
  //print('zz parseBC pre: ' + dump(segment));
  var ret = {
    intertype: 'bitcast',
    type: segment[0],
    params: parseParamTokens(segment[2].item[0].tokens),
  };
  ret.ident = toNiceIdent(ret.params[0].ident);
//print('zz parseBC: ' + dump(ret));
  return ret;
}

function cleanOutTokens(filterOut, tokens, index) {
  while (filterOut.indexOf(tokens[index].text) != -1) {
    tokens.splice(index, 1);
  }
}

function _HexToInt(stringy) {
  var ret = 0;
  var mul = 1;
  var base;
  for (var i = (stringy.length - 1); i >= 0; i = i - 1) {
    if (stringy.charCodeAt(i) >= "A".charCodeAt(0)) {
      base = "A".charCodeAt(0) - 10;
    } else {
      base = "0".charCodeAt(0);
    }
    ret = ret + (mul*(stringy.charCodeAt(i) - base));
    mul = mul * 16;
  }
  return ret;
}

function IEEEUnHex(stringy) {
  var a = _HexToInt(stringy.substr(2, 8));
  var b = _HexToInt(stringy.substr(10));
  var e = (a >> ((52 - 32) & 0x7ff)) - 1023;
  return ((((a & 0xfffff | 0x100000) * 1.0) / Math.pow(2,52-32)) * Math.pow(2, e)) + (((b * 1.0) / Math.pow(2, 52)) * Math.pow(2, e));
}

function parseNumerical(value, type) {
  if ((!type || type == 'double' || type == 'float') && value.substr(0,2) == '0x') {
    // Hexadecimal double value, as the llvm docs say,
    // "The one non-intuitive notation for constants is the hexadecimal form of floating point constants."
    return IEEEUnHex(value);
  }
  if (value == 'null') {
    // NULL *is* 0, in C/C++. No JS null! (null == 0 is false, etc.)
    return '0';
  }
  return value;
}

// \0Dsometext is really '\r', then sometext
// This function returns an array of int values
function parseLLVMString(str) {
  var ret = [];
  var i = 0;
  while (i < str.length) {
    var chr = str[i];
    if (chr != '\\') {
      ret.push(chr.charCodeAt(0));
      i++;
    } else {
      ret.push(_HexToInt(str[i+1]+str[i+2]));
      i += 3;
    }
  }
  return ret;
}

function getLabelIds(labels) {
  return labels.map(function(label) { return label.ident });
}