4 files changed, 1116 insertions, 0 deletions
diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt
index 099a45ff08..37704f316f 100644
--- a/lib/Target/JSBackend/CMakeLists.txt
+++ b/lib/Target/JSBackend/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_target(JSBackendCodeGen
+  ExpandI64.cpp
   JSBackend.cpp
   Relooper.cpp
   SimplifyAllocas.cpp
diff --git a/lib/Target/JSBackend/ExpandI64.cpp b/lib/Target/JSBackend/ExpandI64.cpp
new file mode 100644
index 0000000000..cc63d01c7f
--- /dev/null
+++ b/lib/Target/JSBackend/ExpandI64.cpp
@@ -0,0 +1,1112 @@
+//===- ExpandI64.cpp - Expand i64 and wider integer types -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===------------------------------------------------------------------===//
+//
+// This pass expands and lowers all operations on integers i64 and wider
+// into 32-bit operations that can be handled by JS in a natural way.
+//
+// 64-bit variables become pairs of 2 32-bit variables, for the low and
+// high 32 bit chunks. This happens for both registers and function
+// arguments. Function return values become a return of the low 32 bits
+// and a store of the high 32-bits in tempRet0, a global helper variable.
+// Larger values become more chunks of 32 bits. Currently we require that
+// types be a multiple of 32 bits.
+//
+// Many operations then become simple pairs of operations, for example
+// bitwise AND becomes and AND of each 32-bit chunk. More complex operations
+// like addition are lowered into calls into library support code in
+// Emscripten (i64Add for example).
+//
+//===------------------------------------------------------------------===//
+
+#include "OptPasses.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <map>
+
+#include "llvm/Support/raw_ostream.h"
+
+#ifdef NDEBUG
+#undef assert
+#define assert(x) { if (!(x)) report_fatal_error(#x); }
+#endif
+
+using namespace llvm;
+
+namespace {
+
+  typedef SmallVector<Value*, 2> ChunksVec;
+  typedef std::map<Value*, ChunksVec> SplitsMap;
+
+  typedef SmallVector<PHINode *, 8> PHIVec;
+  typedef SmallVector<Instruction *, 8> DeadVec;
+
+  // This is a ModulePass because the pass recreates functions in
+  // order to expand i64 arguments to pairs of i32s.
+  class ExpandI64 : public ModulePass {
+    bool Changed;
+    DataLayout *DL;
+    Module *TheModule;
+
+    SplitsMap Splits; // old illegal value to new insts
+    PHIVec Phis;
+
+    // If the function has an illegal return or argument, create a legal version
+    void ensureLegalFunc(Function *F);
+
+    // If a function is illegal, remove it
+    void removeIllegalFunc(Function *F);
+
+    // splits an illegal instruction into 32-bit chunks. We do
+    // not yet have the values yet, as they depend on other
+    // splits, so store the parts in Splits, for FinalizeInst.
+    bool splitInst(Instruction *I);
+
+    // For an illegal value, returns the split out chunks
+    // representing the low and high parts, that splitInst
+    // generated.
+    // The value can also be a constant, in which case we just
+    // split it, or a function argument, in which case we
+    // map to the proper legalized new arguments
+    ChunksVec getChunks(Value *V);
+
+    Function *Add, *Sub, *Mul, *SDiv, *UDiv, *SRem, *URem, *LShr, *AShr, *Shl, *GetHigh, *SetHigh, *FtoILow, *FtoIHigh, *DtoILow, *DtoIHigh, *SItoF, *UItoF, *SItoD, *UItoD, *BItoD, *BDtoILow, *BDtoIHigh;
+
+    void ensureFuncs();
+    unsigned getNumChunks(Type *T);
+
+  public:
+    static char ID;
+    ExpandI64() : ModulePass(ID) {
+      initializeExpandI64Pass(*PassRegistry::getPassRegistry());
+
+      Add = Sub = Mul = SDiv = UDiv = SRem = URem = LShr = AShr = Shl = GetHigh = SetHigh = NULL;
+    }
+
+    virtual bool runOnModule(Module &M);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  };
+}
+
+char ExpandI64::ID = 0;
+INITIALIZE_PASS(ExpandI64, "expand-illegal-ints",
+                "Expand and lower illegal >i32 operations into 32-bit chunks",
+                false, false)
+
+// Utilities
+
+static Instruction *CopyDebug(Instruction *NewInst, Instruction *Original) {
+  NewInst->setDebugLoc(Original->getDebugLoc());
+  return NewInst;
+}
+
+static bool isIllegal(Type *T) {
+  return T->isIntegerTy() && T->getIntegerBitWidth() > 32;
+}
+
+static FunctionType *getLegalizedFunctionType(FunctionType *FT) {
+  SmallVector<Type*, 0> ArgTypes; // XXX
+  int Num = FT->getNumParams();
+  for (int i = 0; i < Num; i++) {
+    Type *T = FT->getParamType(i);
+    if (!isIllegal(T)) {
+      ArgTypes.push_back(T);
+    } else {
+      Type *i32 = Type::getInt32Ty(FT->getContext());
+      ArgTypes.push_back(i32);
+      ArgTypes.push_back(i32);
+    }
+  }
+  Type *RT = FT->getReturnType();
+  Type *NewRT;
+  if (!isIllegal(RT)) {
+    NewRT = RT;
+  } else {
+    NewRT = Type::getInt32Ty(FT->getContext());
+  }
+  return FunctionType::get(NewRT, ArgTypes, false);
+}
+
+// Implementation of ExpandI64
+
+static bool okToRemainIllegal(Function *F) {
+  StringRef Name = F->getName();
+  if (Name == "llvm.dbg.value") return true;
+
+  // XXX EMSCRIPTEN: These take an i64 immediate argument; since they're not
+  // real instructions, we don't need to legalize them.
+  if (Name == "llvm.lifetime.start") return true;
+  if (Name == "llvm.lifetime.end") return true;
+  if (Name == "llvm.invariant.start") return true;
+  if (Name == "llvm.invariant.end") return true;
+
+  return false;
+}
+
+unsigned ExpandI64::getNumChunks(Type *T) {
+  unsigned Num = DL->getTypeSizeInBits(T);
+  return (Num + 31) / 32;
+}
+
+static bool isLegalFunctionType(FunctionType *FT) {
+  if (isIllegal(FT->getReturnType())) {
+    return false;
+  }
+
+  int Num = FT->getNumParams();
+  for (int i = 0; i < Num; i++) {
+    if (isIllegal(FT->getParamType(i))) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static bool isLegalInstruction(const Instruction *I) {
+  if (isIllegal(I->getType())) {
+    return false;
+  }
+
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    if (isIllegal(I->getOperand(i)->getType())) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// We can't use RecreateFunction because we need to handle
+// function and argument attributes specially.
+static Function *RecreateFunctionLegalized(Function *F, FunctionType *NewType) {
+  Function *NewFunc = Function::Create(NewType, F->getLinkage());
+
+  AttributeSet Attrs = F->getAttributes();
+  AttributeSet FnAttrs = Attrs.getFnAttributes();
+
+  // Legalizing the return value is done by storing part of the value into
+  // static storage. Subsequent analysis will see this as a memory access,
+  // so we can no longer claim to be readonly or readnone.
+  if (isIllegal(F->getReturnType())) {
+    FnAttrs = FnAttrs.removeAttribute(F->getContext(),
+                                      AttributeSet::FunctionIndex,
+                                      Attribute::ReadOnly);
+    FnAttrs = FnAttrs.removeAttribute(F->getContext(),
+                                      AttributeSet::FunctionIndex,
+                                      Attribute::ReadNone);
+  }
+
+  NewFunc->addAttributes(AttributeSet::FunctionIndex, FnAttrs);
+  NewFunc->addAttributes(AttributeSet::ReturnIndex, Attrs.getRetAttributes());
+  Function::arg_iterator AI = F->arg_begin();
+
+  // We need to recreate the attribute set, with the right indexes
+  AttributeSet NewAttrs;
+  unsigned NumArgs = F->arg_size();
+  for (unsigned i = 1, j = 1; i < NumArgs+1; i++, j++, AI++) {
+    if (isIllegal(AI->getType())) {
+      j++;
+      continue;
+    }
+    if (!Attrs.hasAttributes(i)) continue;
+    AttributeSet ParamAttrs = Attrs.getParamAttributes(i);
+    AttrBuilder AB;
+    unsigned NumSlots = ParamAttrs.getNumSlots();
+    for (unsigned k = 0; k < NumSlots; k++) {
+      for (AttributeSet::iterator I = ParamAttrs.begin(k), E = ParamAttrs.end(k); I != E; I++) {
+        AB.addAttribute(*I);
+      }
+    }
+    NewFunc->addAttributes(j, AttributeSet::get(F->getContext(), j, AB));
+  }
+
+  F->getParent()->getFunctionList().insert(F, NewFunc);
+  NewFunc->takeName(F);
+  NewFunc->getBasicBlockList().splice(NewFunc->begin(),
+                                      F->getBasicBlockList());
+  F->replaceAllUsesWith(
+      ConstantExpr::getBitCast(NewFunc,
+                               F->getFunctionType()->getPointerTo()));
+  return NewFunc;
+}
+
+void ExpandI64::ensureLegalFunc(Function *F) {
+  if (okToRemainIllegal(F)) return;
+
+  FunctionType *FT = F->getFunctionType();
+  if (isLegalFunctionType(FT)) return;
+
+  Changed = true;
+  Function *NF = RecreateFunctionLegalized(F, getLegalizedFunctionType(FT));
+  std::string Name = NF->getName();
+  if (strncmp(Name.c_str(), "llvm.", 5) == 0) {
+    // this is an intrinsic, and we are changing its signature, which will annoy LLVM, so rename
+    const size_t len = Name.size();
+    SmallString<256> NewName;
+    NewName.resize(len);
+    for (unsigned i = 0; i < len; i++) {
+      NewName[i] = Name[i] != '.' ? Name[i] : '_';
+    }
+    NF->setName(Twine(NewName));
+  }
+
+  // Move and update arguments
+  for (Function::arg_iterator Arg = F->arg_begin(), E = F->arg_end(), NewArg = NF->arg_begin();
+       Arg != E; ++Arg) {
+    if (Arg->getType() == NewArg->getType()) {
+      NewArg->takeName(Arg);
+      Arg->replaceAllUsesWith(NewArg);
+      NewArg++;
+    } else {
+      // This was legalized
+      ChunksVec &Chunks = Splits[&*Arg];
+      int Num = getNumChunks(Arg->getType());
+      assert(Num == 2);
+      for (int i = 0; i < Num; i++) {
+        Chunks.push_back(&*NewArg);
+        if (NewArg->hasName()) Chunks[i]->setName(NewArg->getName() + "$" + utostr(i));
+        NewArg++;
+      }
+    }
+  }
+}
+
+void ExpandI64::removeIllegalFunc(Function *F) {
+  if (okToRemainIllegal(F)) return;
+
+  FunctionType *FT = F->getFunctionType();
+  if (!isLegalFunctionType(FT)) {
+    F->eraseFromParent();
+  }
+}
+
+bool ExpandI64::splitInst(Instruction *I) {
+  Type *i32 = Type::getInt32Ty(I->getContext());
+  Type *i32P = i32->getPointerTo();
+  Type *i64 = Type::getInt64Ty(I->getContext());
+  Value *Zero  = Constant::getNullValue(i32);
+
+  ChunksVec &Chunks = Splits[I];
+
+  switch (I->getOpcode()) {
+    case Instruction::GetElementPtr: {
+      GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+      SmallVector<Value*, 2> NewOps;
+      for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) {
+        Value *Op = I->getOperand(i);
+        if (isIllegal(Op->getType())) {
+          // Truncate the operand down to one chunk.
+          NewOps.push_back(getChunks(Op)[0]);
+        } else {
+          NewOps.push_back(Op);
+        }
+      }
+      Value *NewGEP = CopyDebug(GetElementPtrInst::Create(GEP->getPointerOperand(), NewOps, "", GEP), GEP);
+      Chunks.push_back(NewGEP);
+      I->replaceAllUsesWith(NewGEP);
+      break;
+    }
+    case Instruction::SExt: {
+      ChunksVec InputChunks;
+      Value *Op = I->getOperand(0);
+      if (isIllegal(Op->getType())) {
+        InputChunks = getChunks(Op);
+      } else {
+        InputChunks.push_back(Op);
+      }
+
+      for (unsigned i = 0, e = InputChunks.size(); i != e; ++i) {
+        Value *Input = InputChunks[i];
+
+        Type *T = Input->getType();
+        Value *Chunk;
+        if (T->getIntegerBitWidth() < 32) {
+          Chunk = CopyDebug(new SExtInst(Input, i32, "", I), I);
+        } else {
+          assert(T->getIntegerBitWidth() == 32);
+          Chunk = Input;
+        }
+        Chunks.push_back(Chunk);
+      }
+
+      Instruction *Check = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_SLT, Chunks.back(), Zero), I);
+      int Num = getNumChunks(I->getType());
+      for (int i = Chunks.size(); i < Num; i++) {
+        Instruction *High = CopyDebug(new SExtInst(Check, i32, "", I), I);
+        Chunks.push_back(High);
+      }
+      break;
+    }
+    case Instruction::PtrToInt: 
+    case Instruction::ZExt: {
+      Value *Op = I->getOperand(0);
+      ChunksVec InputChunks;
+      if (I->getOpcode() == Instruction::PtrToInt) {
+        InputChunks.push_back(CopyDebug(new PtrToIntInst(Op, i32, "", I), I));
+      } else if (isIllegal(Op->getType())) {
+        InputChunks = getChunks(Op);
+      } else {
+        InputChunks.push_back(Op);
+      }
+
+      for (unsigned i = 0, e = InputChunks.size(); i != e; ++i) {
+        Value *Input = InputChunks[i];
+        Type *T = Input->getType();
+
+        Value *Chunk;
+        if (T->getIntegerBitWidth() < 32) {
+          Chunk = CopyDebug(new ZExtInst(Input, i32, "", I), I);
+        } else {
+          assert(T->getIntegerBitWidth() == 32);
+          Chunk = Input;
+        }
+        Chunks.push_back(Chunk);
+      }
+
+      int Num = getNumChunks(I->getType());
+      for (int i = Chunks.size(); i < Num; i++) {
+        Chunks.push_back(Zero);
+      }
+      break;
+    }
+    case Instruction::IntToPtr:
+    case Instruction::Trunc: {
+      unsigned Num = getNumChunks(I->getType());
+      unsigned NumBits = DL->getTypeSizeInBits(I->getType());
+      ChunksVec InputChunks = getChunks(I->getOperand(0));
+      for (unsigned i = 0; i < Num; i++) {
+        Value *Input = InputChunks[i];
+
+        Value *Chunk;
+        if (NumBits < 32) {
+          Chunk = CopyDebug(new TruncInst(Input, IntegerType::get(I->getContext(), NumBits), "", I), I);
+          NumBits = 0;
+        } else {
+          Chunk = Input;
+          NumBits -= 32;
+        }
+        if (I->getOpcode() == Instruction::IntToPtr) {
+          assert(i == 0);
+          Chunk = CopyDebug(new IntToPtrInst(Chunk, I->getType(), "", I), I);
+        }
+        Chunks.push_back(Chunk);
+      }
+      if (!isIllegal(I->getType())) {
+        assert(Chunks.size() == 1);
+        I->replaceAllUsesWith(Chunks[0]);
+      }
+      break;
+    }
+    case Instruction::Load: {
+      LoadInst *LI = cast<LoadInst>(I);
+      Instruction *AI = CopyDebug(new PtrToIntInst(LI->getPointerOperand(), i32, "", I), I);
+      int Num = getNumChunks(I->getType());
+      for (int i = 0; i < Num; i++) {
+        Instruction *Add = i == 0 ? AI : CopyDebug(BinaryOperator::Create(Instruction::Add, AI, ConstantInt::get(i32, 4*i), "", I), I);
+        Instruction *Ptr = CopyDebug(new IntToPtrInst(Add, i32P, "", I), I);
+        LoadInst *Chunk = new LoadInst(Ptr, "", I); CopyDebug(Chunk, I);
+        Chunk->setAlignment(MinAlign(LI->getAlignment() == 0 ?
+                                         DL->getABITypeAlignment(LI->getType()) :
+                                         LI->getAlignment(),
+                                     4*i));
+        Chunk->setVolatile(LI->isVolatile());
+        Chunk->setOrdering(LI->getOrdering());
+        Chunk->setSynchScope(LI->getSynchScope());
+        Chunks.push_back(Chunk);
+      }
+      break;
+    }
+    case Instruction::Store: {
+      StoreInst *SI = cast<StoreInst>(I);
+      Instruction *AI = CopyDebug(new PtrToIntInst(SI->getPointerOperand(), i32, "", I), I);
+      ChunksVec InputChunks = getChunks(SI->getValueOperand());
+      int Num = InputChunks.size();
+      for (int i = 0; i < Num; i++) {
+        Instruction *Add = i == 0 ? AI : CopyDebug(BinaryOperator::Create(Instruction::Add, AI, ConstantInt::get(i32, 4*i), "", I), I);
+        Instruction *Ptr = CopyDebug(new IntToPtrInst(Add, i32P, "", I), I);
+        StoreInst *Chunk = new StoreInst(InputChunks[i], Ptr, I);
+        Chunk->setAlignment(MinAlign(SI->getAlignment() == 0 ?
+                                         DL->getABITypeAlignment(SI->getValueOperand()->getType()) :
+                                         SI->getAlignment(),
+                                     4*i));
+        Chunk->setVolatile(SI->isVolatile());
+        Chunk->setOrdering(SI->getOrdering());
+        Chunk->setSynchScope(SI->getSynchScope());
+        CopyDebug(Chunk, I);
+      }
+      break;
+    }
+    case Instruction::Ret: {
+      assert(I->getOperand(0)->getType() == i64);
+      ChunksVec InputChunks = getChunks(I->getOperand(0));
+      ensureFuncs();
+      SmallVector<Value *, 1> Args;
+      Args.push_back(InputChunks[1]);
+      CopyDebug(CallInst::Create(SetHigh, Args, "", I), I);
+      CopyDebug(ReturnInst::Create(I->getContext(), InputChunks[0], I), I);
+      break;
+    }
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::SRem:
+    case Instruction::URem:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::Shl: {
+      ChunksVec LeftChunks = getChunks(I->getOperand(0));
+      ChunksVec RightChunks = getChunks(I->getOperand(1));
+      unsigned Num = getNumChunks(I->getType());
+      if (Num == 2) {
+        ensureFuncs();
+        Value *Low = NULL, *High = NULL;
+        Function *F = NULL;
+        switch (I->getOpcode()) {
+          case Instruction::Add:  F = Add;  break;
+          case Instruction::Sub:  F = Sub;  break;
+          case Instruction::Mul:  F = Mul;  break;
+          case Instruction::SDiv: F = SDiv; break;
+          case Instruction::UDiv: F = UDiv; break;
+          case Instruction::SRem: F = SRem; break;
+          case Instruction::URem: F = URem; break;
+          case Instruction::AShr: F = AShr; break;
+          case Instruction::LShr: {
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+              unsigned Shifts = CI->getZExtValue();
+              if (Shifts == 32) {
+                Low = LeftChunks[1];
+                High = Zero;
+                break;
+              }
+            }
+            F = LShr;
+            break;
+          }
+          case Instruction::Shl: {
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+              const APInt &Shifts = CI->getValue();
+              if (Shifts == 32) {
+                Low = Zero;
+                High = LeftChunks[0];
+                break;
+              }
+            }
+            F = Shl;
+            break;
+          }
+          default: assert(0);
+        }
+        if (F) {
+          // use a library call, no special optimization was found
+          SmallVector<Value *, 4> Args;
+          Args.push_back(LeftChunks[0]);
+          Args.push_back(LeftChunks[1]);
+          Args.push_back(RightChunks[0]);
+          Args.push_back(RightChunks[1]);
+          Low = CopyDebug(CallInst::Create(F, Args, "", I), I);
+          High = CopyDebug(CallInst::Create(GetHigh, "", I), I);
+        }
+        Chunks.push_back(Low);
+        Chunks.push_back(High);
+      } else {
+        // more than 64 bits. handle simple shifts for lshr and shl
+        assert(I->getOpcode() == Instruction::LShr || I->getOpcode() == Instruction::AShr || I->getOpcode() == Instruction::Shl);
+        ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+        unsigned Shifts = CI->getZExtValue();
+        unsigned Fraction = Shifts % 32;
+        Constant *Frac = ConstantInt::get(i32, Fraction);
+        Constant *Comp = ConstantInt::get(i32, 32 - Fraction);
+        Instruction::BinaryOps Opcode, Reverse;
+        unsigned ShiftChunks, Dir;
+        Value *TopFiller = Zero;
+        if (I->getOpcode() == Instruction::Shl) {
+          Opcode = Instruction::Shl;
+          Reverse = Instruction::LShr;
+          ShiftChunks = -(Shifts/32);
+          Dir = -1;
+        } else {
+          Opcode = Instruction::LShr;
+          Reverse = Instruction::Shl;
+          ShiftChunks = Shifts/32;
+          Dir = 1;
+          if (I->getOpcode() == Instruction::AShr) {
+            Value *Cond = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_SLT, LeftChunks[LeftChunks.size()-1], Zero), I);
+            TopFiller = CopyDebug(SelectInst::Create(Cond, ConstantInt::get(i32, -1), Zero, "", I), I);
+          }
+        }
+        for (unsigned i = 0; i < Num; i++) {
+          Value *L;
+          if (i + ShiftChunks < LeftChunks.size()) {
+            L = LeftChunks[i + ShiftChunks];
+          } else {
+            L = Zero;
+          }
+
+          Value *H;
+          if (i + ShiftChunks + Dir < LeftChunks.size()) {
+            H = LeftChunks[i + ShiftChunks + Dir];
+          } else {
+            H = TopFiller;
+          }
+
+          // shifted the fractional amount
+          if (Frac != Zero && L != Zero) {
+            if (Fraction == 32) {
+              L = Zero;
+            } else {
+              L = CopyDebug(BinaryOperator::Create(Opcode, L, Frac, "", I), I);
+            }
+          }
+          // shifted the complement-fractional amount to the other side
+          if (Comp != Zero && H != Zero) {
+            if (Fraction == 0) {
+              H = TopFiller;
+            } else {
+              H = CopyDebug(BinaryOperator::Create(Reverse, H, Comp, "", I), I);
+            }
+          }
+
+          // Or the parts together. Since we may have zero, try to fold it away.
+          if (Value *V = SimplifyBinOp(Instruction::Or, L, H, DL)) {
+            Chunks.push_back(V);
+          } else {
+            Chunks.push_back(CopyDebug(BinaryOperator::Create(Instruction::Or, L, H, "", I), I));
+          }
+        }
+      }
+      break;
+    }
+    case Instruction::ICmp: {
+      ICmpInst *CE = cast<ICmpInst>(I);
+      ICmpInst::Predicate Pred = CE->getPredicate();
+      ChunksVec LeftChunks = getChunks(I->getOperand(0));
+      ChunksVec RightChunks = getChunks(I->getOperand(1));
+      switch (Pred) {
+        case ICmpInst::ICMP_EQ:
+        case ICmpInst::ICMP_NE: {
+          ICmpInst::Predicate PartPred; // the predicate to use on each of the parts
+          llvm::Instruction::BinaryOps CombineOp; // the predicate to use to combine the subcomparisons
+          int Num = LeftChunks.size();
+          if (Pred == ICmpInst::ICMP_EQ) {
+            PartPred = ICmpInst::ICMP_EQ;
+            CombineOp = Instruction::And;
+          } else {
+            PartPred = ICmpInst::ICMP_NE;
+            CombineOp = Instruction::Or;
+          }
+          // first combine 0 and 1. then combine that with 2, etc.
+          Value *Combined = NULL;
+          for (int i = 0; i < Num; i++) {
+            Value *Cmp = CopyDebug(new ICmpInst(I, PartPred, LeftChunks[i], RightChunks[i]), I);
+            Combined = !Combined ? Cmp : CopyDebug(BinaryOperator::Create(CombineOp, Combined, Cmp, "", I), I);
+          }
+          I->replaceAllUsesWith(Combined);
+          break;
+        }
+        case ICmpInst::ICMP_ULT:
+        case ICmpInst::ICMP_SLT:
+        case ICmpInst::ICMP_UGT:
+        case ICmpInst::ICMP_SGT:
+        case ICmpInst::ICMP_ULE:
+        case ICmpInst::ICMP_SLE:
+        case ICmpInst::ICMP_UGE:
+        case ICmpInst::ICMP_SGE: {
+          assert(I->getOperand(0)->getType() == i64);
+          Instruction *A, *B, *C, *D, *Final;
+          ICmpInst::Predicate StrictPred = Pred;
+          ICmpInst::Predicate UnsignedPred = Pred;
+          switch (Pred) {
+            case ICmpInst::ICMP_ULE: StrictPred = ICmpInst::ICMP_ULT; break;
+            case ICmpInst::ICMP_UGE: StrictPred = ICmpInst::ICMP_UGT; break;
+            case ICmpInst::ICMP_SLE: StrictPred = ICmpInst::ICMP_SLT; UnsignedPred = ICmpInst::ICMP_ULE; break;
+            case ICmpInst::ICMP_SGE: StrictPred = ICmpInst::ICMP_SGT; UnsignedPred = ICmpInst::ICMP_UGE; break;
+            case ICmpInst::ICMP_SLT:                                  UnsignedPred = ICmpInst::ICMP_ULT; break;
+            case ICmpInst::ICMP_SGT:                                  UnsignedPred = ICmpInst::ICMP_UGT; break;
+            case ICmpInst::ICMP_ULT: break;
+            case ICmpInst::ICMP_UGT: break;
+            default: assert(0);
+          }
+          A = CopyDebug(new ICmpInst(I, StrictPred, LeftChunks[1], RightChunks[1]), I);
+          B = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_EQ, LeftChunks[1], RightChunks[1]), I);
+          C = CopyDebug(new ICmpInst(I, UnsignedPred, LeftChunks[0], RightChunks[0]), I);
+          D = CopyDebug(BinaryOperator::Create(Instruction::And, B, C, "", I), I);
+          Final = CopyDebug(BinaryOperator::Create(Instruction::Or, A, D, "", I), I);
+          I->replaceAllUsesWith(Final);
+          break;
+        }
+        default: assert(0);
+      }
+      break;
+    }
+    case Instruction::Select: {
+      SelectInst *SI = cast<SelectInst>(I);
+      Value *Cond = SI->getCondition();
+      ChunksVec TrueChunks = getChunks(SI->getTrueValue());
+      ChunksVec FalseChunks = getChunks(SI->getFalseValue());
+      unsigned Num = getNumChunks(I->getType());
+      for (unsigned i = 0; i < Num; i++) {
+        Instruction *Part = CopyDebug(SelectInst::Create(Cond, TrueChunks[i], FalseChunks[i], "", I), I);
+        Chunks.push_back(Part);
+      }
+      break;
+    }
+    case Instruction::PHI: {
+      PHINode *Parent = cast<PHINode>(I);
+      int Num = getNumChunks(I->getType());
+      int PhiNum = Parent->getNumIncomingValues();
+      for (int i = 0; i < Num; i++) {
+        Instruction *P = CopyDebug(PHINode::Create(i32, PhiNum, "", I), I);
+        Chunks.push_back(P);
+      }
+      // PHI node operands may not be translated yet; we'll handle them at the end.
+      Phis.push_back(Parent);
+      break;
+    }
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      BinaryOperator *BO = cast<BinaryOperator>(I);
+      ChunksVec LeftChunks = getChunks(BO->getOperand(0));
+      ChunksVec RightChunks = getChunks(BO->getOperand(1));
+      int Num = getNumChunks(BO->getType());
+      for (int i = 0; i < Num; i++) {
+        // If there's a constant operand, it's likely enough that one of the
+        // chunks will be a trivial operation, so it's worth calling
+        // SimplifyBinOp here.
+        if (Value *V = SimplifyBinOp(BO->getOpcode(), LeftChunks[i], RightChunks[i], DL)) {
+          Chunks.push_back(V);
+        } else {
+          Chunks.push_back(CopyDebug(BinaryOperator::Create(BO->getOpcode(), LeftChunks[i], RightChunks[i], "", BO), BO));
+        }
+      }
+      break;
+    }
+    case Instruction::Call: {
+      CallInst *CI = cast<CallInst>(I);
+      Function *F = CI->getCalledFunction();
+      if (F) {
+        assert(okToRemainIllegal(F));
+        return false;
+      }
+      Value *CV = CI->getCalledValue();
+      FunctionType *OFT = NULL;
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+        assert(CE);
+        assert(CE->getOpcode() == Instruction::BitCast);
+        OFT = cast<FunctionType>(cast<PointerType>(CE->getType())->getElementType());
+        Constant *C = CE->getOperand(0);
+        CV = ConstantExpr::getBitCast(C, getLegalizedFunctionType(OFT)->getPointerTo());
+      } else {
+        // this is a function pointer call
+        OFT = cast<FunctionType>(cast<PointerType>(CV->getType())->getElementType());
+        // we need to add a bitcast
+        CV = new BitCastInst(CV, getLegalizedFunctionType(OFT)->getPointerTo(), "", I);
+      }
+      // create a call with space for legal args
+      SmallVector<Value *, 0> Args; // XXX
+      int Num = OFT->getNumParams();
+      for (int i = 0; i < Num; i++) {
+        Type *T = OFT->getParamType(i);
+        if (!isIllegal(T)) {
+          Args.push_back(CI->getArgOperand(i));
+        } else {
+          assert(T == i64);
+          ChunksVec ArgChunks = getChunks(CI->getArgOperand(i));
+          Args.push_back(ArgChunks[0]);
+          Args.push_back(ArgChunks[1]);
+        }
+      }
+      Instruction *L = CopyDebug(CallInst::Create(CV, Args, "", I), I);
+      Instruction *H = NULL;
+      // legalize return value as well, if necessary
+      if (isIllegal(I->getType())) {
+        assert(I->getType() == i64);
+        ensureFuncs();
+        H = CopyDebug(CallInst::Create(GetHigh, "", I), I);
+        Chunks.push_back(L);
+        Chunks.push_back(H);
+      } else {
+        I->replaceAllUsesWith(L);
+      }
+      break;
+    }
+    case Instruction::FPToUI:
+    case Instruction::FPToSI: {
+      assert(I->getType() == i64);
+      ensureFuncs();
+      SmallVector<Value *, 1> Args;
+      Value *Input = I->getOperand(0);
+      Args.push_back(Input);
+      Instruction *L, *H;
+      if (Input->getType()->isFloatTy()) {
+        L = CopyDebug(CallInst::Create(FtoILow, Args, "", I), I);
+        H = CopyDebug(CallInst::Create(FtoIHigh, Args, "", I), I);
+      } else {
+        L = CopyDebug(CallInst::Create(DtoILow, Args, "", I), I);
+        H = CopyDebug(CallInst::Create(DtoIHigh, Args, "", I), I);
+      }
+      Chunks.push_back(L);
+      Chunks.push_back(H);
+      break;
+    }
+    case Instruction::BitCast: {
+      if (I->getType() == Type::getDoubleTy(TheModule->getContext())) {
+        // fall through to itofp
+      } else if (I->getOperand(0)->getType() == Type::getDoubleTy(TheModule->getContext())) {
+        // double to i64
+        assert(I->getType() == i64);
+        ensureFuncs();
+        SmallVector<Value *, 1> Args;
+        Args.push_back(I->getOperand(0));
+        Instruction *L = CopyDebug(CallInst::Create(BDtoILow, Args, "", I), I);
+        Instruction *H = CopyDebug(CallInst::Create(BDtoIHigh, Args, "", I), I);
+        Chunks.push_back(L);
+        Chunks.push_back(H);
+        break;
+      } else {
+        // no-op bitcast
+        assert(I->getType() == I->getOperand(0)->getType());
+        Chunks = getChunks(I->getOperand(0));
+        break;
+      }
+    }
+    case Instruction::SIToFP:
+    case Instruction::UIToFP: {
+      assert(I->getOperand(0)->getType() == i64);
+      ensureFuncs();
+      ChunksVec InputChunks = getChunks(I->getOperand(0));
+      Function *F;
+      switch (I->getOpcode()) {
+        case Instruction::SIToFP: F = I->getType() == Type::getDoubleTy(TheModule->getContext()) ? SItoD : SItoF; break;
+        case Instruction::UIToFP: F = I->getType() == Type::getDoubleTy(TheModule->getContext()) ? UItoD : UItoF; break;
+        case Instruction::BitCast: {
+          assert(I->getType() == Type::getDoubleTy(TheModule->getContext()));
+          F = BItoD;
+          break;
+        }
+        default: assert(0);
+      }
+      Instruction *D = CopyDebug(CallInst::Create(F, InputChunks, "", I), I);
+      I->replaceAllUsesWith(D);
+      break;
+    }
+    case Instruction::Switch: {
+      assert(I->getOperand(0)->getType() == i64);
+      ChunksVec InputChunks = getChunks(I->getOperand(0));
+
+      // do a switch on the lower 32 bits, into a different basic block for each target, then do a branch in each of those on the high 32 bits
+      SwitchInst* SI = cast<SwitchInst>(I);
+      BasicBlock *DD = SI->getDefaultDest();
+      BasicBlock *SwitchBB = I->getParent();
+      Function *F = SwitchBB->getParent();
+
+      unsigned NumItems = 0;
+      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) {
+        NumItems += i.getCaseValueEx().getNumItems();
+      }
+      SwitchInst *LowSI = SwitchInst::Create(InputChunks[0], DD, NumItems, I); // same default destination: if lower bits do not match, go straight to default
+      CopyDebug(LowSI, I);
+
+      typedef std::pair<uint32_t, BasicBlock*> Pair;
+      typedef std::vector<Pair> Vec; // vector of pairs of high 32 bits, basic block
+      typedef std::map<uint32_t, Vec> Map; // maps low 32 bits to their Vec info
+      Map Groups;                          // (as two 64-bit values in the switch may share their lower bits)
+
+      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) {
+        BasicBlock *BB = i.getCaseSuccessor();
+        const IntegersSubset CaseVal = i.getCaseValueEx();
+        assert(CaseVal.isSingleNumbersOnly());
+        for (unsigned Index = 0; Index < CaseVal.getNumItems(); Index++) {
+          uint64_t Bits = CaseVal.getSingleNumber(Index).toConstantInt()->getZExtValue();
+          uint32_t LowBits = (uint32_t)Bits;
+          uint32_t HighBits = (uint32_t)(Bits >> 32);
+          Vec& V = Groups[LowBits];
+          V.push_back(Pair(HighBits, BB));
+        }
+      }
+
+      unsigned Counter = 0;
+      BasicBlock *InsertPoint = SwitchBB;
+
+      for (Map::iterator GI = Groups.begin(); GI != Groups.end(); GI++) {
+        uint32_t LowBits = GI->first;
+        Vec &V = GI->second;
+
+        BasicBlock *NewBB = BasicBlock::Create(F->getContext(), "switch64_" + utostr(Counter++), F);
+        NewBB->moveAfter(InsertPoint);
+        InsertPoint = NewBB;
+        LowSI->addCase(cast<ConstantInt>(ConstantInt::get(i32, LowBits)), NewBB);
+
+        /*if (V.size() == 1) {
+          // just one option, create a branch
+          Instruction *CheckHigh = CopyDebug(new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, InputChunks[1], ConstantInt::get(i32, V[0]->first)), I);
+          Split.ToFix.push_back(CheckHigh);
+          CopyDebug(BranchInst::Create(V[0]->second, DD, CheckHigh, NewBB), I);
+        } else {*/
+
+        // multiple options, create a switch - we could also optimize and make an icmp/branch if just one, as in commented code above
+        SwitchInst *HighSI = SwitchInst::Create(InputChunks[1], DD, V.size(), NewBB); // same default destination: if lower bits do not match, go straight to default
+        for (unsigned i = 0; i < V.size(); i++) {
+          BasicBlock *BB = V[i].second;
+          HighSI->addCase(cast<ConstantInt>(ConstantInt::get(i32, V[i].first)), BB);
+          // fix phis, we used to go SwitchBB->BB, but now go SwitchBB->NewBB->BB, so we look like we arrived from NewBB. Replace the phi from the
+          // now unneeded SwitchBB to the new BB
+          for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+            PHINode *Phi = dyn_cast<PHINode>(I);
+            if (!Phi) break;
+            Phi->setIncomingBlock(Phi->getBasicBlockIndex(SwitchBB), NewBB);
+          }
+        }
+
+        // We used to go SwitchBB->DD, but now go SwitchBB->NewBB->DD, fix that like with BB above. However here we do not replace,
+        // as the switch BB is still possible to arrive from - we can arrive at the default if either the lower bits were wrong (we
+        // arrive from the switchBB) or from the NewBB if the high bits were wrong.
+        for (BasicBlock::iterator I = DD->begin(); I != DD->end(); ++I) {
+          PHINode *Phi = dyn_cast<PHINode>(I);
+          if (!Phi) break;
+          Phi->addIncoming(Phi->getIncomingValue(Phi->getBasicBlockIndex(SwitchBB)), NewBB);
+          // XXX note that we add a new i64 thing here. now the phi was already an i64 operation, and is being legalized anyhow, but we only notice the original inputs!
+          //     we seem to be safe for now due to order of operation (phis show up after switches, but FIXME
+        }
+      }
+      break;
+    }
+    default: {
+      I->dump();
+      assert(0 && "some i64 thing we can't legalize yet");
+    }
+  }
+
+  return true;
+}
+
+ChunksVec ExpandI64::getChunks(Value *V) {
+  assert(isIllegal(V->getType()));
+
+  unsigned Num = getNumChunks(V->getType());
+  Type *i32 = Type::getInt32Ty(V->getContext());
+
+  if (isa<UndefValue>(V))
+    return ChunksVec(Num, UndefValue::get(i32));
+
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    ChunksVec Chunks;
+    for (unsigned i = 0; i < Num; i++) {
+      Constant *Count = ConstantInt::get(C->getType(), i * 32);
+      Constant *NewC = ConstantExpr::getTrunc(ConstantExpr::getLShr(C, Count), i32);
+      TargetLibraryInfo *TLI = 0; // TODO
+      if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
+        if (Constant *FoldedC = ConstantFoldConstantExpression(NewCE, DL, TLI)) {
+          NewC = FoldedC;
+        }
+      }
+
+      Chunks.push_back(NewC);
+    }
+    return Chunks;
+  }
+
+  assert(Splits.find(V) != Splits.end());
+  assert(Splits[V].size() == Num);
+  return Splits[V];
+}
+
+void ExpandI64::ensureFuncs() {
+  if (Add != NULL) return;