diff options
author | Dan Gohman <sunfish@google.com> | 2014-02-12 18:09:52 -0800 |
---|---|---|
committer | Dan Gohman <sunfish@google.com> | 2014-02-12 22:30:21 -0800 |
commit | 718e02f4cea34a6f34d7a6f1f82411960bff1ba4 (patch) | |
tree | a7c10dbce5f13c4badecacd1f6de052606b354ae | |
parent | 2b36da6f5426d0ea67c85fdee8016a5bbc828a23 (diff) |
Revamp ExpandI64.
By using a reverse-postorder traversal of the basic blocks, we can perform
this transform in a single pass. This eliminates the need for tricky
coordination between the passes.
-rw-r--r-- | lib/Target/JSBackend/JSBackend.cpp | 1 | ||||
-rw-r--r-- | lib/Transforms/NaCl/ExpandI64.cpp | 973 | ||||
-rw-r--r-- | test/Transforms/NaCl/expand-i64.ll | 272 |
3 files changed, 686 insertions, 560 deletions
diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp index 98f2c17de0..174e6b1141 100644 --- a/lib/Target/JSBackend/JSBackend.cpp +++ b/lib/Target/JSBackend/JSBackend.cpp @@ -302,6 +302,7 @@ namespace { } } std::string getPtrAsStr(const Value* Ptr) { + Ptr = Ptr->stripPointerCasts(); if (isa<const ConstantPointerNull>(Ptr)) return "0"; if (const Function *F = dyn_cast<Function>(Ptr)) { return utostr(getFunctionIndex(F)); diff --git a/lib/Transforms/NaCl/ExpandI64.cpp b/lib/Transforms/NaCl/ExpandI64.cpp index b2e33e6d68..8f0efb2478 100644 --- a/lib/Transforms/NaCl/ExpandI64.cpp +++ b/lib/Transforms/NaCl/ExpandI64.cpp @@ -1,4 +1,4 @@ -//===- ExpandI64.cpp - Expand out variable argument function calls-----===// +//===- ExpandI64.cpp - Expand i64 and wider integer types -------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===------------------------------------------------------------------===// // -// This pass expands and lowers all operations on integers larger than i64 +// This pass expands and lowers all operations on integers i64 and wider // into 32-bit operations that can be handled by JS in a natural way. // // 64-bit variables become pairs of 2 32-bit variables, for the low and @@ -24,8 +24,11 @@ // //===------------------------------------------------------------------===// +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -33,49 +36,36 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CFG.h" #include "llvm/Transforms/NaCl.h" #include <map> #include "llvm/Support/raw_ostream.h" -#include <stdio.h> -#define dump(x) fprintf(stderr, x "\n") -#define dumpv(x, ...) fprintf(stderr, x "\n", __VA_ARGS__) -#define dumpfail(x) { fprintf(stderr, x "\n"); fprintf(stderr, "%s : %d\n", __FILE__, __LINE__); report_fatal_error("fail"); } -#define dumpfailv(x, ...) { fprintf(stderr, x "\n", __VA_ARGS__); fprintf(stderr, "%s : %d\n", __FILE__, __LINE__); report_fatal_error("fail"); } -#define dumpIR(value) { \ - std::string temp; \ - raw_string_ostream stream(temp); \ - stream << *(value); \ - fprintf(stderr, "%s\n", temp.c_str()); \ -} + +#ifdef NDEBUG #undef assert -#define assert(x) { if (!(x)) dumpfail(#x); } +#define assert(x) { if (!(x)) report_fatal_error(#x); } +#endif using namespace llvm; namespace { typedef SmallVector<Value*, 2> ChunksVec; + typedef std::map<Value*, ChunksVec> SplitsMap; - typedef std::vector<Instruction*> SplitInstrs; - - // The tricky part in all this pass is that we legalize many instructions that interdepend on each - // other. So we do one pass where we create the new legal instructions but leave the illegal ones - // in place, then a second where we hook up the legal ones to the other legal ones, and only - // then do we remove the illegal ones. - struct SplitInfo { - SplitInstrs ToFix; // new instrs, which we fix up later with proper legalized input (if they received illegal input) - ChunksVec Chunks; // 32-bit chunks of the legalized output, if the output was illegal - }; - - typedef std::map<Instruction*, SplitInfo> SplitsMap; - typedef std::map<Value*, ChunksVec> ArgsMap; + typedef SmallVector<PHINode *, 8> PHIVec; + typedef SmallVector<Instruction *, 8> DeadVec; // This is a ModulePass because the pass recreates functions in // order to expand i64 arguments to pairs of i32s. class ExpandI64 : public ModulePass { + bool Changed; + DataLayout *DL; + Module *TheModule; + SplitsMap Splits; // old illegal value to new insts - ArgsMap SplitArgs; // old illegal function arguments, to split parts + PHIVec Phis; // If the function has an illegal return or argument, create a legal version void ensureLegalFunc(Function *F); @@ -86,7 +76,7 @@ namespace { // splits an illegal instruction into 32-bit chunks. We do // not yet have the values yet, as they depend on other // splits, so store the parts in Splits, for FinalizeInst. - void splitInst(Instruction *I, DataLayout& DL); + bool splitInst(Instruction *I); // For an illegal value, returns the split out chunks // representing the low and high parts, that splitInst @@ -96,13 +86,10 @@ namespace { // map to the proper legalized new arguments ChunksVec getChunks(Value *V); - void finalizeInst(Instruction *I); - Function *Add, *Sub, *Mul, *SDiv, *UDiv, *SRem, *URem, *LShr, *AShr, *Shl, *GetHigh, *SetHigh, *FtoILow, *FtoIHigh, *DtoILow, *DtoIHigh, *SItoF, *UItoF, *SItoD, *UItoD, *BItoD, *BDtoILow, *BDtoIHigh; void ensureFuncs(); - - Module *TheModule; + unsigned getNumChunks(Type *T); public: static char ID; @@ -113,6 +100,7 @@ namespace { } virtual bool runOnModule(Module &M); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; }; } @@ -152,73 +140,133 @@ static FunctionType *getLegalizedFunctionType(FunctionType *FT) { // Implementation of ExpandI64 -bool okToRemainIllegal(Function *F) { - const char *Name = F->getName().str().c_str(); - if (strcmp(Name, "llvm.dbg.value") == 0) return true; +static bool okToRemainIllegal(Function *F) { + StringRef Name = F->getName(); + if (Name == "llvm.dbg.value") return true; + + // XXX EMSCRIPTEN: These take an i64 immediate argument; since they're not + // real instructions, we don't need to legalize them. + if (Name == "llvm.lifetime.start") return true; + if (Name == "llvm.lifetime.end") return true; + if (Name == "llvm.invariant.start") return true; + if (Name == "llvm.invariant.end") return true; + return false; } -int getNumChunks(Type *T) { - IntegerType *IT = cast<IntegerType>(T); - int Num = IT->getBitWidth(); - if (Num%32 != 0) assert(0); - return Num/32; +unsigned ExpandI64::getNumChunks(Type *T) { + unsigned Num = DL->getTypeSizeInBits(T); + return (Num + 31) / 32; +} + +static bool isLegalFunctionType(FunctionType *FT) { + if (isIllegal(FT->getReturnType())) { + return false; + } + + int Num = FT->getNumParams(); + for (int i = 0; i < Num; i++) { + if (isIllegal(FT->getParamType(i))) { + return false; + } + } + + return true; +} + +static bool isLegalInstruction(const Instruction *I) { + if (isIllegal(I->getType())) { + return false; + } + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (isIllegal(I->getOperand(i)->getType())) { + return false; + } + } + + return true; +} + +// We can't use RecreateFunction because we need to handle +// function and argument attributes specially. +static Function *RecreateFunctionLegalized(Function *F, FunctionType *NewType) { + Function *NewFunc = Function::Create(NewType, F->getLinkage()); + + AttributeSet Attrs = F->getAttributes(); + AttributeSet FnAttrs = Attrs.getFnAttributes(); + + // Legalizing the return value is done by storing part of the value into + // static storage. Subsequent analysis will see this as a memory access, + // so we can no longer claim to be readonly or readnone. + if (isIllegal(F->getReturnType())) { + FnAttrs = FnAttrs.removeAttribute(F->getContext(), + AttributeSet::FunctionIndex, + Attribute::ReadOnly); + FnAttrs = FnAttrs.removeAttribute(F->getContext(), + AttributeSet::FunctionIndex, + Attribute::ReadNone); + } + + NewFunc->addAttributes(AttributeSet::FunctionIndex, FnAttrs); + NewFunc->addAttributes(AttributeSet::ReturnIndex, Attrs.getRetAttributes()); + Function::arg_iterator AI = F->arg_begin(); + for (unsigned i = 0, e = F->arg_size(), j = 0; i != e; ++i, ++j, ++AI) { + AttributeSet ArgAttrs = Attrs.getParamAttributes(i); + if (isIllegal(AI->getType())) { + ++j; + } else { + NewFunc->addAttributes(j + 1, ArgAttrs); + } + } + + F->getParent()->getFunctionList().insert(F, NewFunc); + NewFunc->takeName(F); + NewFunc->getBasicBlockList().splice(NewFunc->begin(), + F->getBasicBlockList()); + F->replaceAllUsesWith( + ConstantExpr::getBitCast(NewFunc, + F->getFunctionType()->getPointerTo())); + return NewFunc; } void ExpandI64::ensureLegalFunc(Function *F) { if (okToRemainIllegal(F)) return; FunctionType *FT = F->getFunctionType(); - int Num = FT->getNumParams(); + if (isLegalFunctionType(FT)) return; + + Changed = true; + Function *NF = RecreateFunctionLegalized(F, getLegalizedFunctionType(FT)); + std::string Name = NF->getName(); + if (strncmp(Name.c_str(), "llvm.", 5) == 0) { + // this is an intrinsic, and we are changing its signature, which will annoy LLVM, so rename + const size_t len = Name.size(); + SmallString<256> NewName; + NewName.resize(len); + for (unsigned i = 0; i < len; i++) { + NewName[i] = Name[i] != '.' ? Name[i] : '_'; + } + NF->setName(Twine(NewName)); + } - // Allocate small names on stack, large ones on heap. - // This is because on VS2010, arrays on the stack must - // be compile-time constant sized. (no C99 dynamic-length array support) - const size_t StackSize = 256; - char StackArray[StackSize]; - char *AllocArray = 0; - - for (int i = -1; i < Num; i++) { - Type *T = i == -1 ? FT->getReturnType() : FT->getParamType(i); - if (isIllegal(T)) { - Function *NF = RecreateFunction(F, getLegalizedFunctionType(FT)); - std::string Name = NF->getName(); - if (strncmp(Name.c_str(), "llvm.", 5) == 0) { - // this is an intrinsic, and we are changing its signature, which will annoy LLVM, so rename - const size_t len = Name.size()+1; - char *NewName; - if (len > StackSize) - NewName = AllocArray = new char[len]; - else - NewName = StackArray; - const char *CName = Name.c_str(); - for (unsigned i = 0; i < len; i++) { - NewName[i] = CName[i] != '.' ? CName[i] : '_'; - } - NF->setName(NewName); - delete[] AllocArray; - AllocArray = 0; - } - // Move and update arguments - for (Function::arg_iterator Arg = F->arg_begin(), E = F->arg_end(), NewArg = NF->arg_begin(); - Arg != E; ++Arg) { - if (Arg->getType() == NewArg->getType()) { - NewArg->takeName(Arg); - Arg->replaceAllUsesWith(NewArg); - NewArg++; - } else { - // This was legalized - ChunksVec &Chunks = SplitArgs[&*Arg]; - int Num = getNumChunks(Arg->getType()); - assert(Num == 2); - for (int i = 0; i < Num; i++) { - Chunks.push_back(&*NewArg); - if (NewArg->hasName()) Chunks[i]->setName(NewArg->getName() + "$" + utostr(i)); - NewArg++; - } - } + // Move and update arguments + for (Function::arg_iterator Arg = F->arg_begin(), E = F->arg_end(), NewArg = NF->arg_begin(); + Arg != E; ++Arg) { + if (Arg->getType() == NewArg->getType()) { + NewArg->takeName(Arg); + Arg->replaceAllUsesWith(NewArg); + NewArg++; + } else { + // This was legalized + ChunksVec &Chunks = Splits[&*Arg]; + int Num = getNumChunks(Arg->getType()); + assert(Num == 2); + for (int i = 0; i < Num; i++) { + Chunks.push_back(&*NewArg); + if (NewArg->hasName()) Chunks[i]->setName(NewArg->getName() + "$" + utostr(i)); + NewArg++; } - break; } } } @@ -227,76 +275,113 @@ void ExpandI64::removeIllegalFunc(Function *F) { if (okToRemainIllegal(F)) return; FunctionType *FT = F->getFunctionType(); - int Num = FT->getNumParams(); - for (int i = -1; i < Num; i++) { - Type *T = i == -1 ? FT->getReturnType() : FT->getParamType(i); - if (isIllegal(T)) { - F->eraseFromParent(); - break; - } + if (!isLegalFunctionType(FT)) { + F->eraseFromParent(); } } -void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { +bool ExpandI64::splitInst(Instruction *I) { Type *i32 = Type::getInt32Ty(I->getContext()); Type *i32P = i32->getPointerTo(); Type *i64 = Type::getInt64Ty(I->getContext()); Value *Zero = Constant::getNullValue(i32); - Value *Ones = Constant::getAllOnesValue(i32); - SplitInfo &Split = Splits[I]; + ChunksVec &Chunks = Splits[I]; switch (I->getOpcode()) { case Instruction::SExt: { - Value *Input = I->getOperand(0); - Type *T = Input->getType(); - Value *Low; - if (T->getIntegerBitWidth() < 32) { - Low = CopyDebug(new SExtInst(Input, i32, "", I), I); + ChunksVec InputChunks; + Value *Op = I->getOperand(0); + if (isIllegal(Op->getType())) { + InputChunks = getChunks(Op); } else { - assert(T->getIntegerBitWidth() == 32); - Low = CopyDebug(BinaryOperator::Create(Instruction::Or, Input, Zero, "", I), I); // copy the input, hackishly XXX + InputChunks.push_back(Op); + } + + for (unsigned i = 0, e = InputChunks.size(); i != e; ++i) { + Value *Input = InputChunks[i]; + + Type *T = Input->getType(); + Value *Chunk; + if (T->getIntegerBitWidth() < 32) { + Chunk = CopyDebug(new SExtInst(Input, i32, "", I), I); + } else { + assert(T->getIntegerBitWidth() == 32); + Chunk = Input; + } + Chunks.push_back(Chunk); } - Split.Chunks.push_back(Low); - Instruction *Check = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_SLT, Low, Zero), I); + + Instruction *Check = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_SLT, Chunks.back(), Zero), I); int Num = getNumChunks(I->getType()); - for (int i = 1; i < Num; i++) { - Instruction *High = CopyDebug(SelectInst::Create(Check, Ones, Zero, "", I), I); - Split.Chunks.push_back(High); + for (int i = Chunks.size(); i < Num; i++) { + Instruction *High = CopyDebug(new SExtInst(Check, i32, "", I), I); + Chunks.push_back(High); } break; } + case Instruction::PtrToInt: case Instruction::ZExt: { - Value *Input = I->getOperand(0); - unsigned InputBits = Input->getType()->getIntegerBitWidth(); - if (InputBits < 32) { - I->setOperand(0, CopyDebug(new ZExtInst(Input, i32, "", I), I)); // add a zext to 32-bit size + Value *Op = I->getOperand(0); + ChunksVec InputChunks; + if (I->getOpcode() == Instruction::PtrToInt) { + InputChunks.push_back(CopyDebug(new PtrToIntInst(Op, i32, "", I), I)); + } else if (isIllegal(Op->getType())) { + InputChunks = getChunks(Op); } else { - if (InputBits % 32 != 0) assert(0); + InputChunks.push_back(Op); + } + + for (unsigned i = 0, e = InputChunks.size(); i != e; ++i) { + Value *Input = InputChunks[i]; + Type *T = Input->getType(); + + Value *Chunk; + if (T->getIntegerBitWidth() < 32) { + Chunk = CopyDebug(new ZExtInst(Input, i32, "", I), I); + } else { + assert(T->getIntegerBitWidth() == 32); + Chunk = Input; + } + Chunks.push_back(Chunk); } + int Num = getNumChunks(I->getType()); - for (int i = 0; i < Num; i++) { - Split.Chunks.push_back(BinaryOperator::Create(Instruction::Or, Zero, Zero, "", I)); // copy the input, hackishly XXX + for (int i = Chunks.size(); i < Num; i++) { + Chunks.push_back(Zero); } break; } + case Instruction::IntToPtr: case Instruction::Trunc: { - unsigned Bits = I->getType()->getIntegerBitWidth(); - if (Bits < 32) { - // we need to add a trunc of the low 32 bits - Instruction *L = CopyDebug(new TruncInst(Zero, I->getType(), "", I), I); - Split.ToFix.push_back(L); - } else if (Bits > 32) { - if (Bits % 32 != 0) assert(0); - int Num = getNumChunks(I->getType()); - for (int i = 0; i < Num; i++) { - Split.Chunks.push_back(BinaryOperator::Create(Instruction::Or, Zero, Zero, "", I)); // copy the input, hackishly XXX + unsigned Num = getNumChunks(I->getType()); + unsigned NumBits = DL->getTypeSizeInBits(I->getType()); + ChunksVec InputChunks = getChunks(I->getOperand(0)); + for (unsigned i = 0; i < Num; i++) { + Value *Input = InputChunks[i]; + + Value *Chunk; + if (NumBits < 32) { + Chunk = CopyDebug(new TruncInst(Input, IntegerType::get(I->getContext(), NumBits), "", I), I); + NumBits = 0; + } else { + Chunk = Input; + NumBits -= 32; + } + if (I->getOpcode() == Instruction::IntToPtr) { + assert(i == 0); + Chunk = CopyDebug(new IntToPtrInst(Chunk, I->getType(), "", I), I); } + Chunks.push_back(Chunk); + } + if (!isIllegal(I->getType())) { + assert(Chunks.size() == 1); + I->replaceAllUsesWith(Chunks[0]); } break; } case Instruction::Load: { - LoadInst *LI = dyn_cast<LoadInst>(I); + LoadInst *LI = cast<LoadInst>(I); Instruction *AI = CopyDebug(new PtrToIntInst(LI->getPointerOperand(), i32, "", I), I); int Num = getNumChunks(I->getType()); for (int i = 0; i < Num; i++) { @@ -304,32 +389,38 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { Instruction *Ptr = CopyDebug(new IntToPtrInst(Add, i32P, "", I), I); LoadInst *Chunk = new LoadInst(Ptr, "", I); CopyDebug(Chunk, I); Chunk->setAlignment(std::min(4U, LI->getAlignment())); - Split.Chunks.push_back(Chunk); + Chunk->setVolatile(LI->isVolatile()); + Chunk->setOrdering(LI->getOrdering()); + Chunk->setSynchScope(LI->getSynchScope()); + Chunks.push_back(Chunk); } break; } case Instruction::Store: { - StoreInst *SI = dyn_cast<StoreInst>(I); + StoreInst *SI = cast<StoreInst>(I); Instruction *AI = CopyDebug(new PtrToIntInst(SI->getPointerOperand(), i32, "", I), I); - int Num = getNumChunks(SI->getValueOperand()->getType()); + ChunksVec InputChunks = getChunks(SI->getValueOperand()); + int Num = InputChunks.size(); for (int i = 0; i < Num; i++) { Instruction *Add = i == 0 ? AI : CopyDebug(BinaryOperator::Create(Instruction::Add, AI, ConstantInt::get(i32, 4*i), "", I), I); Instruction *Ptr = CopyDebug(new IntToPtrInst(Add, i32P, "", I), I); - StoreInst *Chunk = new StoreInst(Zero, Ptr, "", I); CopyDebug(Chunk, I); + StoreInst *Chunk = new StoreInst(InputChunks[i], Ptr, I); Chunk->setAlignment(std::min(4U, SI->getAlignment())); - Split.ToFix.push_back(Chunk); + Chunk->setVolatile(SI->isVolatile()); + Chunk->setOrdering(SI->getOrdering()); + Chunk->setSynchScope(SI->getSynchScope()); + CopyDebug(Chunk, I); } break; } case Instruction::Ret: { assert(I->getOperand(0)->getType() == i64); + ChunksVec InputChunks = getChunks(I->getOperand(0)); ensureFuncs(); SmallVector<Value *, 1> Args; - Args.push_back(Zero); // will be fixed - Instruction *High = CopyDebug(CallInst::Create(SetHigh, Args, "", I), I); - Instruction *Low = CopyDebug(ReturnInst::Create(I->getContext(), Zero, I), I); // will be fixed - Split.ToFix.push_back(Low); - Split.ToFix.push_back(High); + Args.push_back(InputChunks[1]); + CopyDebug(CallInst::Create(SetHigh, Args, "", I), I); + CopyDebug(ReturnInst::Create(I->getContext(), InputChunks[0], I), I); break; } case Instruction::Add: @@ -342,7 +433,9 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { case Instruction::LShr: case Instruction::AShr: case Instruction::Shl: { - int Num = getNumChunks(I->getOperand(0)->getType()); + ChunksVec LeftChunks = getChunks(I->getOperand(0)); + ChunksVec RightChunks = getChunks(I->getOperand(1)); + unsigned Num = getNumChunks(I->getType()); if (Num == 2) { ensureFuncs(); Value *Low = NULL, *High = NULL; @@ -355,11 +448,12 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { case Instruction::UDiv: F = UDiv; break; case Instruction::SRem: F = SRem; break; case Instruction::URem: F = URem; break; + case Instruction::AShr: F = AShr; break; case Instruction::LShr: { if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned Shifts = CI->getZExtValue(); if (Shifts == 32) { - Low = CopyDebug(BinaryOperator::Create(Instruction::Or, Zero, Zero, "", I), I); // copy hackishly XXX TODO: eliminate x|0 to x in post-pass + Low = LeftChunks[1]; High = Zero; break; } @@ -367,13 +461,12 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { F = LShr; break; } - case Instruction::AShr: F = AShr; break; case Instruction::Shl: { if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - unsigned Shifts = CI->getZExtValue(); + const APInt &Shifts = CI->getValue(); if (Shifts == 32) { Low = Zero; - High = CopyDebug(BinaryOperator::Create(Instruction::Or, Zero, Zero, "", I), I); // copy hackishly XXX TODO: eliminate x|0 to x in post-pass + High = LeftChunks[0]; break; } } @@ -385,67 +478,83 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { if (F) { // use a library call, no special optimization was found SmallVector<Value *, 4> Args; - for (unsigned i = 0; i < 4; i++) Args.push_back(Zero); // will be fixed + Args.push_back(LeftChunks[0]); + Args.push_back(LeftChunks[1]); + Args.push_back(RightChunks[0]); + Args.push_back(RightChunks[1]); Low = CopyDebug(CallInst::Create(F, Args, "", I), I); High = CopyDebug(CallInst::Create(GetHigh, "", I), I); } - Split.Chunks.push_back(Low); - Split.Chunks.push_back(High); + Chunks.push_back(Low); + Chunks.push_back(High); } else { - // more than 32 bits. handle simple shifts for lshr and shl + // more than 64 bits. handle simple shifts for lshr and shl + assert(I->getOpcode() == Instruction::LShr || I->getOpcode() == Instruction::Shl); ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); - int Shifts = CI->getSExtValue(); - if (Shifts % 32 == 0) { - for (int i = 0; i < Num; i++) { - Split.Chunks.push_back(BinaryOperator::Create(Instruction::Or, Zero, Zero, "", I)); // copy hackishly XXX TODO: eliminate x|0 to x in post-pass - } + unsigned Shifts = CI->getZExtValue(); + Constant *Frac = ConstantInt::get(i32, Shifts % 32); + Constant *Comp = ConstantInt::get(i32, 32 - (Shifts % 32)); + Instruction::BinaryOps Opcode, Reverse; + unsigned ShiftChunks, Dir; + if (I->getOpcode() == Instruction::Shl) { + Opcode = Instruction::Shl; + Reverse = Instruction::LShr; + ShiftChunks = -(Shifts/32); + Dir = -1; + } else if (I->getOpcode() == Instruction::LShr) { + Opcode = Instruction::LShr; + Reverse = Instruction::Shl; + ShiftChunks = Shifts/32; + Dir = 1; } else { - // not a simple multiple of 32. - Constant *Frac = ConstantInt::get(i32, Shifts % 32); - Constant *Comp = ConstantInt::get(i32, 32 - (Shifts % 32)); - Instruction::BinaryOps Opcode, Reverse; - int ShiftChunks, Dir; - if (I->getOpcode() == Instruction::Shl) { - Opcode = Instruction::Shl; - Reverse = Instruction::LShr; - ShiftChunks = -Shifts/32; - Dir = -1; - } else if (I->getOpcode() == Instruction::LShr) { - Opcode = Instruction::LShr; - Reverse = Instruction::Shl; - ShiftChunks = Shifts/32; - Dir = 1; + errs() << *I << "\n"; + assert(0); + } + for (unsigned i = 0; i < Num; i++) { + Value *L; + if (i + ShiftChunks < LeftChunks.size()) { + L = LeftChunks[i + ShiftChunks]; } else { - errs() << *I << "\n"; - assert(0); + L = Zero; } - for (int i = 0; i < Num; i++) { - Split.ToFix.push_back(BinaryOperator::Create(Opcode, Zero, Frac, "", I)); // shifted the fractional amount + + Value *H; + if (i + ShiftChunks + Dir < LeftChunks.size()) { + H = LeftChunks[i + ShiftChunks + Dir]; + } else { + H = Zero; } - for (int i = 0; i < Num; i++) { - Split.ToFix.push_back(BinaryOperator::Create(Reverse, Zero, Comp, "", I)); // shifted the complement-fractional amount to the other side + + // shifted the fractional amount + if (Frac != Zero && L != Zero) { + L = CopyDebug(BinaryOperator::Create(Opcode, L, Frac, "", I), I); } - for (int i = 0; i < Num; i++) { - int j = i + ShiftChunks; - int k = i + ShiftChunks + Dir; - Split.Chunks.push_back(BinaryOperator::Create(Instruction::Or, - 0 <= j && j < Num ? Split.ToFix[ j] : Zero, - 0 <= k && k < Num ? Split.ToFix[Num+k] : Zero, - "", I)); // shifted the complement-fractional amount to the other side + // shifted the complement-fractional amount to the other side + if (Comp != Zero && H != Zero) { + H = CopyDebug(BinaryOperator::Create(Reverse, H, Comp, "", I), I); + } + + // Or the parts together. Since we may have zero, try to fold it away. + if (Value *V = SimplifyBinOp(Instruction::Or, L, H, DL)) { + Chunks.push_back(V); + } else { + Chunks.push_back(CopyDebug(BinaryOperator::Create(Instruction::Or, L, H, "", I), I)); } } } break; } case Instruction::ICmp: { - ICmpInst *CE = dyn_cast<ICmpInst>(I); + ICmpInst *CE = cast<ICmpInst>(I); ICmpInst::Predicate Pred = CE->getPredicate(); + ChunksVec LeftChunks = getChunks(I->getOperand(0)); + ChunksVec RightChunks = getChunks(I->getOperand(1)); switch (Pred) { case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: { ICmpInst::Predicate PartPred; // the predicate to use on each of the parts llvm::Instruction::BinaryOps CombineOp; // the predicate to use to combine the subcomparisons - int Num = getNumChunks(I->getOperand(0)->getType()); + int Num = LeftChunks.size(); if (Pred == ICmpInst::ICMP_EQ) { PartPred = ICmpInst::ICMP_EQ; CombineOp = Instruction::And; @@ -453,14 +562,13 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { PartPred = ICmpInst::ICMP_NE; CombineOp = Instruction::Or; } - for (int i = 0; i < Num; i++) { - Split.ToFix.push_back(CopyDebug(new ICmpInst(I, PartPred, Zero, Zero), I)); - } // first combine 0 and 1. then combine that with 2, etc. - Split.ToFix.push_back(CopyDebug(BinaryOperator::Create(CombineOp, Split.ToFix[0], Split.ToFix[1], "", I), I)); - for (int i = 2; i < Num; i++) { - Split.ToFix.push_back(CopyDebug(BinaryOperator::Create(CombineOp, Split.ToFix[i], Split.ToFix[Split.ToFix.size()-1], "", I), I)); + Value *Combined = NULL; + for (int i = 0; i < Num; i++) { + Value *Cmp = CopyDebug(new ICmpInst(I, PartPred, LeftChunks[i], RightChunks[i]), I); + Combined = !Combined ? Cmp : CopyDebug(BinaryOperator::Create(CombineOp, Combined, Cmp, "", I), I); } + I->replaceAllUsesWith(Combined); break; } case ICmpInst::ICMP_ULT: @@ -486,16 +594,12 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { case ICmpInst::ICMP_UGT: break; default: assert(0); } - A = CopyDebug(new ICmpInst(I, StrictPred, Zero, Zero), I); - B = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_EQ, Zero, Zero), I); - C = CopyDebug(new ICmpInst(I, UnsignedPred, Zero, Zero), I); + A = CopyDebug(new ICmpInst(I, StrictPred, LeftChunks[1], RightChunks[1]), I); + B = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_EQ, LeftChunks[1], RightChunks[1]), I); + C = CopyDebug(new ICmpInst(I, UnsignedPred, LeftChunks[0], RightChunks[0]), I); D = CopyDebug(BinaryOperator::Create(Instruction::And, B, C, "", I), I); Final = CopyDebug(BinaryOperator::Create(Instruction::Or, A, D, "", I), I); - Split.ToFix.push_back(A); - Split.ToFix.push_back(B); - Split.ToFix.push_back(C); - // D is NULL or a logical operator, no need to fix it - Split.ToFix.push_back(Final); + I->replaceAllUsesWith(Final); break; } default: assert(0); @@ -503,48 +607,54 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { break; } case Instruction::Select: { - Value *Cond = I->getOperand(0); - int Num = getNumChunks(I->getOperand(1)->getType()); - for (int i = 0; i < Num; i++) { - Instruction *C = CopyDebug(SelectInst::Create(Cond, Zero, Zero, "", I), I); // will be fixed - Split.Chunks.push_back(C); + SelectInst *SI = cast<SelectInst>(I); + Value *Cond = SI->getCondition(); + ChunksVec TrueChunks = getChunks(SI->getTrueValue()); + ChunksVec FalseChunks = getChunks(SI->getFalseValue()); + unsigned Num = getNumChunks(I->getType()); + for (unsigned i = 0; i < Num; i++) { + Instruction *Part = CopyDebug(SelectInst::Create(Cond, TrueChunks[i], FalseChunks[i], "", I), I); + Chunks.push_back(Part); } break; } case Instruction::PHI: { + PHINode *Parent = cast<PHINode>(I); int Num = getNumChunks(I->getType()); - PHINode *Parent = dyn_cast<PHINode>(I); int PhiNum = Parent->getNumIncomingValues(); for (int i = 0; i < Num; i++) { - PHINode *P = PHINode::Create(i32, PhiNum, "", I); CopyDebug(P, I); - for (int j = 0; j < PhiNum; j++) { - P->addIncoming(Zero, Parent->getIncomingBlock(j)); // will be fixed - } - Split.Chunks.push_back(P); + Instruction *P = CopyDebug(PHINode::Create(i32, PhiNum, "", I), I); + Chunks.push_back(P); } + // PHI node operands may not be translated yet; we'll handle them at the end. + Phis.push_back(Parent); break; } case Instruction::And: case Instruction::Or: case Instruction::Xor: { - Instruction::BinaryOps Op; - switch (I->getOpcode()) { // XXX why does llvm make us do this? - case Instruction::And: Op = Instruction::And; break; - case Instruction::Or: Op = Instruction::Or; break; - case Instruction::Xor: Op = Instruction::Xor; break; - } - int Num = getNumChunks(I->getType()); + BinaryOperator *BO = cast<BinaryOperator>(I); + ChunksVec LeftChunks = getChunks(BO->getOperand(0)); + ChunksVec RightChunks = getChunks(BO->getOperand(1)); + int Num = getNumChunks(BO->getType()); for (int i = 0; i < Num; i++) { - Split.Chunks.push_back(CopyDebug(BinaryOperator::Create(Op, Zero, Zero, "", I), I)); + // If there's a constant operand, it's likely enough that one of the + // chunks will be a trivial operation, so it's worth calling + // SimplifyBinOp here. + if (Value *V = SimplifyBinOp(BO->getOpcode(), LeftChunks[i], RightChunks[i], DL)) { + Chunks.push_back(V); + } else { + Chunks.push_back(CopyDebug(BinaryOperator::Create(BO->getOpcode(), LeftChunks[i], RightChunks[i], "", BO), BO)); + } } break; } case Instruction::Call: { - CallInst *CI = dyn_cast<CallInst>(I); + CallInst *CI = cast<CallInst>(I); Function *F = CI->getCalledFunction(); if (F) { assert(okToRemainIllegal(F)); - break; + return false; } Value *CV = CI->getCalledValue(); FunctionType *OFT = NULL; @@ -552,7 +662,8 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { assert(CE); assert(CE->getOpcode() == Instruction::BitCast); OFT = cast<FunctionType>(cast<PointerType>(CE->getType())->getElementType()); - CV = CE->getOperand(0); // we are legalizing the arguments now, so no need to bitcast any more + Constant *C = CE->getOperand(0); + CV = ConstantExpr::getBitCast(C, getLegalizedFunctionType(OFT)->getPointerTo()); } else { // this is a function pointer call OFT = cast<FunctionType>(cast<PointerType>(CV->getType())->getElementType()); @@ -568,8 +679,9 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { Args.push_back(CI->getArgOperand(i)); } else { assert(T == i64); - Args.push_back(Zero); // will be fixed - Args.push_back(Zero); + ChunksVec ArgChunks = getChunks(CI->getArgOperand(i)); + Args.push_back(ArgChunks[0]); + Args.push_back(ArgChunks[1]); } } Instruction *L = CopyDebug(CallInst::Create(CV, Args, "", I), I); @@ -579,9 +691,11 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { assert(I->getType() == i64); ensureFuncs(); H = CopyDebug(CallInst::Create(GetHigh, "", I), I); + Chunks.push_back(L); + Chunks.push_back(H); + } else { + I->replaceAllUsesWith(L); } - Split.Chunks.push_back(L); - Split.Chunks.push_back(H); break; } case Instruction::FPToUI: @@ -599,14 +713,14 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { L = CopyDebug(CallInst::Create(DtoILow, Args, "", I), I); H = CopyDebug(CallInst::Create(DtoIHigh, Args, "", I), I); } - Split.Chunks.push_back(L); - Split.Chunks.push_back(H); + Chunks.push_back(L); + Chunks.push_back(H); break; } case Instruction::BitCast: { if (I->getType() == Type::getDoubleTy(TheModule->getContext())) { // fall through to itofp - } else { + } else if (I->getOperand(0)->getType() == Type::getDoubleTy(TheModule->getContext())) { // double to i64 assert(I->getType() == i64); ensureFuncs(); @@ -614,8 +728,13 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { Args.push_back(I->getOperand(0)); Instruction *L = CopyDebug(CallInst::Create(BDtoILow, Args, "", I), I); Instruction *H = CopyDebug(CallInst::Create(BDtoIHigh, Args, "", I), I); - Split.Chunks.push_back(L); - Split.Chunks.push_back(H); + Chunks.push_back(L); + Chunks.push_back(H); + break; + } else { + // no-op bitcast + assert(I->getType() == I->getOperand(0)->getType()); + Chunks = getChunks(I->getOperand(0)); break; } } @@ -623,9 +742,7 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { case Instruction::UIToFP: { assert(I->getOperand(0)->getType() == i64); ensureFuncs(); - SmallVector<Value *, 2> Args; - Args.push_back(Zero); - Args.push_back(Zero); + ChunksVec InputChunks = getChunks(I->getOperand(0)); Function *F; switch (I->getOpcode()) { case Instruction::SIToFP: F = I->getType() == Type::getDoubleTy(TheModule->getContext()) ? SItoD : SItoF; break; @@ -637,12 +754,14 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { } default: assert(0); } - Instruction *D = CopyDebug(CallInst::Create(F, Args, "", I), I); - Split.ToFix.push_back(D); + Instruction *D = CopyDebug(CallInst::Create(F, InputChunks, "", I), I); + I->replaceAllUsesWith(D); break; } case Instruction::Switch: { assert(I->getOperand(0)->getType() == i64); + ChunksVec InputChunks = getChunks(I->getOperand(0)); + // do a switch on the lower 32 bits, into a different basic block for each target, then do a branch in each of those on the high 32 bits SwitchInst* SI = cast<SwitchInst>(I); BasicBlock *DD = SI->getDefaultDest(); @@ -653,9 +772,8 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { NumItems += i.getCaseValueEx().getNumItems(); } - SwitchInst *LowSI = SwitchInst::Create(Zero, DD, NumItems, I); // same default destination: if lower bits do not match, go straight to default + SwitchInst *LowSI = SwitchInst::Create(InputChunks[0], DD, NumItems, I); // same default destination: if lower bits do not match, go straight to default CopyDebug(LowSI, I); - Split.ToFix.push_back(LowSI); typedef std::pair<uint32_t, BasicBlock*> Pair; typedef std::vector<Pair> Vec; // vector of pairs of high 32 bits, basic block @@ -689,14 +807,13 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { /*if (V.size() == 1) { // just one option, create a branch - Instruction *CheckHigh = CopyDebug(new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, Zero, ConstantInt::get(i32, V[0]->first)), I); + Instruction *CheckHigh = CopyDebug(new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, InputChunks[1], ConstantInt::get(i32, V[0]->first)), I); Split.ToFix.push_back(CheckHigh); CopyDebug(BranchInst::Create(V[0]->second, DD, CheckHigh, NewBB), I); } else {*/ // multiple options, create a switch - we could also optimize and make an icmp/branch if just one, as in commented code above - SwitchInst *HighSI = SwitchInst::Create(Zero, DD, V.size(), NewBB); // same default destination: if lower bits do not match, go straight to default - Split.ToFix.push_back(HighSI); + SwitchInst *HighSI = SwitchInst::Create(InputChunks[1], DD, V.size(), NewBB); // same default destination: if lower bits do not match, go straight to default for (unsigned i = 0; i < V.size(); i++) { BasicBlock *BB = V[i].second; HighSI->addCase(cast<ConstantInt>(ConstantInt::get(i32, V[i].first)), BB); @@ -723,285 +840,42 @@ void ExpandI64::splitInst(Instruction *I, DataLayout& DL) { break; } default: { - dumpIR(I); + I->dump(); assert(0 && "some i64 thing we can't legalize yet"); } } + + return true; } ChunksVec ExpandI64::getChunks(Value *V) { + assert(isIllegal(V->getType())); + Type *i32 = Type::getInt32Ty(V->getContext()); Value *Zero = ConstantInt::get(i32, 0); - int Num = getNumChunks(V->getType()); + unsigned Num = getNumChunks(V->getType()); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - uint64_t C = CI->getZExtValue(); + APInt C = CI->getValue(); ChunksVec Chunks; - Chunks.push_back(ConstantInt::get(i32, (uint32_t)C)); - Chunks.push_back(ConstantInt::get(i32, (uint32_t)(C >> 32))); - for (int i = 2; i < Num; i++) { - Chunks.push_back(Zero); + for (unsigned i = 0; i < Num; i++) { + Chunks.push_back(ConstantInt::get(i32, C.trunc(32))); + C = C.lshr(32); } return Chunks; } else if (Instruction *I = dyn_cast<Instruction>(V)) { assert(Splits.find(I) != Splits.end()); - return Splits[I].Chunks; + return Splits[I]; } else if (isa<UndefValue>(V)) { ChunksVec Chunks; - for (int i = 0; i < Num; i++) { + for (unsigned i = 0; i < Num; i++) { Chunks.push_back(Zero); } return Chunks; } else { - assert(SplitArgs.find(V) != SplitArgs.end()); - return SplitArgs[V]; - } -} - -void ExpandI64::finalizeInst(Instruction *I) { - SplitInfo &Split = Splits[I]; - switch (I->getOpcode()) { - case Instruction::Load: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: { - break; // input was legal - } - case Instruction::ZExt: { - Value *Input = I->getOperand(0); - unsigned InputBits = Input->getType()->getIntegerBitWidth(); - unsigned Num = getNumChunks(I->getOperand(0)->getType()); - Type *i32 = Type::getInt32Ty(TheModule->getContext()); - Constant *Zero = Constant::getNullValue(i32); - if (InputBits <= 32) { - cast<Instruction>(Split.Chunks[0])->setOperand(0, I->getOperand(0)); - for (unsigned i = 1; i < Num; i++) { - cast<Instruction>(Split.Chunks[i])->setOperand(0, Zero); - } - } else { - ChunksVec Chunks = getChunks(I->getOperand(0)); - for (unsigned i = 0; i < Num; i++) { - cast<Instruction>(Split.Chunks[i])->setOperand(0, i < Chunks.size() ? Chunks[i] : Zero); - } - } - break; - } - case Instruction::Trunc: { - ChunksVec Chunks = getChunks(I->getOperand(0)); - unsigned Bits = I->getType()->getIntegerBitWidth(); - if (Bits < 32) { - Instruction *L = Split.ToFix[0]; - L->setOperand(0, Chunks[0]); - I->replaceAllUsesWith(L); - } else if (Bits == 32) { - I->replaceAllUsesWith(Chunks[0]); // just use the lower 32 bits and you're set - } else { - int Num = getNumChunks(I->getType()); - for (int i = 0; i < Num; i++) { - cast<Instruction>(Split.Chunks[i])->setOperand(0, Chunks[i]); - } - } - break; - } - case Instruction::Store: - case Instruction::Ret: { - // generic fix of an instruction with one illegal input in operand 0, and consisting of legal instructions - ChunksVec Chunks = getChunks(I->getOperand(0)); - int Num = getNumChunks(I->getOperand(0)->getType()); - for (int i = 0; i < Num; i++) { - Split.ToFix[i]->setOperand(0, Chunks[i]); - } - break; - } - case Instruction::BitCast: { - if (I->getType() == Type::getDoubleTy(TheModule->getContext())) { - // fall through to itofp - } else { - break; // input was legal - } - } - case Instruction::SIToFP: - case Instruction::UIToFP: { - // generic fix of an instruction with one 64-bit input, and a legal output - ChunksVec Chunks = getChunks(I->getOperand(0)); - Instruction *D = Split.ToFix[0]; - D->setOperand(0, Chunks[0]); - D->setOperand(1, Chunks[1]); - I->replaceAllUsesWith(D); - break; - } - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::SRem: - case Instruction::URem: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::Shl: { - ChunksVec Left = getChunks(I->getOperand(0)); - ChunksVec Right = getChunks(I->getOperand(1)); - int Num = getNumChunks(I->getOperand(0)->getType()); - if (Num == 2) { - CallInst *Call = dyn_cast<CallInst>(Split.Chunks[0]); - if (Call) { - Call->setOperand(0, Left[0]); - Call->setOperand(1, Left[1]); - Call->setOperand(2, Right[0]); - Call->setOperand(3, Right[1]); - } else { - // optimized case, 32-bit shifts - switch (I->getOpcode()) { - case Instruction::LShr: { - cast<Instruction>(Split.Chunks[0])->setOperand(0, Left[1]); - break; - } - case Instruction::Shl: { - cast<Instruction>(Split.Chunks[1])->setOperand(0, Left[0]); - break; - } - default: assert(0); - } - } - } else { - // more than 32 bits. handle simple shifts for lshr and shl - ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); - unsigned Shifts = CI->getZExtValue(); - if (Shifts % 32 == 0) { - unsigned ChunkShifts = Shifts/32; - int Move; - switch (I->getOpcode()) { - case Instruction::LShr: Move = ChunkShifts; break; // XXX verify this is not off-by-one - case Instruction::Shl: Move = -ChunkShifts; break; - default: assert(0); - } - for (int i = 0; i < Num; i++) { - int j = i + Move; - if (0 <= j && j < Num) { - cast<Instruction>(Split.Chunks[i])->setOperand(0, Left[j]); - } - // otherwise it was already zero, which is correct - } - } else { - // not a simple multiple of 32. - for (int i = 0; i < Num; i++) { - cast<Instruction>(Split.ToFix[i])->setOperand(0, Left[i]); - cast<Instruction>(Split.ToFix[Num+i])->setOperand(0, Left[i]); - } - } - } - break; - } - case Instruction::ICmp: { - ChunksVec Left = getChunks(I->getOperand(0)); - ChunksVec Right = getChunks(I->getOperand(1)); - ICmpInst *CE = dyn_cast<ICmpInst>(I); - ICmpInst::Predicate Pred = CE->getPredicate(); - switch (Pred) { - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_NE: { - int Num = getNumChunks(I->getOperand(0)->getType()); - for (int i = 0; i < Num; i++) { - Split.ToFix[i]->setOperand(0, Left[i]); - Split.ToFix[i]->setOperand(1, Right[i]); - } - I->replaceAllUsesWith(Split.ToFix[Split.ToFix.size()-1]); - break; - } - default: { - Instruction *A = Split.ToFix[0]; - Instruction *B = Split.ToFix[1]; - Instruction *C = Split.ToFix[2]; - Instruction *Final = Split.ToFix[3]; - A->setOperand(0, Left[1]); A->setOperand(1, Right[1]); - B->setOperand(0, Left[1]); B->setOperand(1, Right[1]); - C->setOperand(0, Left[0]); C->setOperand(1, Right[0]); - I->replaceAllUsesWith(Final); - break; - } - } - break; - } - case Instruction::Select: { - int Num = getNumChunks(I->getOperand(1)->getType()); - ChunksVec True = getChunks(I->getOperand(1)); - ChunksVec False = getChunks(I->getOperand(2)); - for (int i = 0; i < Num; i++) { - cast<Instruction>(Split.Chunks[i])->setOperand(1, True[i]); - cast<Instruction>(Split.Chunks[i])->setOperand(2, False[i]); - } - break; - } - case Instruction::PHI: { - int Num = getNumChunks(I->getType()); - PHINode *Parent = dyn_cast<PHINode>(I); - int PhiNum = Parent->getNumIncomingValues(); - for (int i = 0; i < Num; i++) { - PHINode *P = cast<PHINode>(Split.Chunks[i]); - for (int j = 0; j < PhiNum; j++) { - ChunksVec Chunks = getChunks(Parent->getIncomingValue(j)); - P->setIncomingValue(j, Chunks[i]); - } - } - break; - } - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - ChunksVec Left = getChunks(I->getOperand(0)); - ChunksVec Right = getChunks(I->getOperand(1)); - int Num = getNumChunks(I->getType()); - for (int i = 0; i < Num; i++) { - Instruction *Chunk = cast<Instruction>(Split.Chunks[i]); - Chunk->setOperand(0, Left[i]); - Chunk->setOperand(1, Right[i]); - } - break; - } - case Instruction::Call: { - if (Split.Chunks.size() == 0) break; // was not legalized - - Instruction *L = cast<Instruction>(Split.Chunks[0]); - // H doesn't need to be fixed, it's just a call to getHigh - - // fill in split parts of illegals - CallInst *CI = dyn_cast<CallInst>(L); - CallInst *OCI = dyn_cast<CallInst>(I); - int Num = OCI->getNumArgOperands(); - for (int i = 0, j = 0; i < Num; i++, j++) { - if (isIllegal(OCI->getArgOperand(i)->getType())) { - ChunksVec Chunks = getChunks(OCI->getArgOperand(i)); - CI->setArgOperand(j, Chunks[0]); - CI->setArgOperand(j + 1, Chunks[1]); - j++; - } - } - if (!isIllegal(I->getType())) { - // legal return value, so just replace the old call with the new call - I->replaceAllUsesWith(L); - } - break; - } - case Instruction::Switch: { - SwitchInst *SI = dyn_cast<SwitchInst>(I); - ChunksVec Chunks = getChunks(SI->getCondition()); - SwitchInst *NewSI = dyn_cast<SwitchInst>(Split.ToFix[0]); - NewSI->setCondition(Chunks[0]); - unsigned Num = Split.ToFix.size(); - for (unsigned i = 1; i < Num; i++) { - Instruction *Curr = Split.ToFix[i]; - if (SwitchInst *SI = dyn_cast<SwitchInst>(Curr)) { - SI->setCondition(Chunks[1]); - } else { - assert(0); - Split.ToFix[i]->setOperand(0, Chunks[1]); - } - } - break; - } - default: dumpIR(I); assert(0); + assert(Splits.find(V) != Splits.end()); + return Splits[V]; } } @@ -1099,9 +973,10 @@ void ExpandI64::ensureFuncs() { bool ExpandI64::runOnModule(Module &M) { TheModule = &M; + DL = &getAnalysis<DataLayout>(); + Splits.clear(); + Changed = false; - bool Changed = false; - DataLayout DL(&M); // pre pass - legalize functions for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { Function *Func = Iter++; @@ -1109,92 +984,70 @@ bool ExpandI64::runOnModule(Module &M) { } // first pass - split - for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { - Function *Func = Iter++; - for (Function::iterator BB = Func->begin(), E = Func->end(); - BB != E; - ++BB) { + DeadVec Dead; + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ++Iter) { + Function *Func = Iter; + if (Func->isDeclaration()) { + continue; + } + + // Walk the body of the function. We use reverse postorder so that we visit + // all operands of an instruction before the instruction itself. The + // exception to this is PHI nodes, which we put on a list and handle below. + ReversePostOrderTraversal<Function*> RPOT(Func); + for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), + RE = RPOT.end(); RI != RE; ++RI) { + BasicBlock *BB = *RI; for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { Instruction *I = Iter++; - //dump("consider"); dumpIR(I); - // FIXME: this could be optimized, we don't need all Num for all instructions - int Num = I->getNumOperands(); - for (int i = -1; i < Num; i++) { // -1 is the type of I itself - Type *T = i == -1 ? I->getType() : I->getOperand(i)->getType(); - if (isIllegal(T)) { + if (!isLegalInstruction(I)) { + if (splitInst(I)) { Changed = true; - splitInst(I, DL); - break; + Dead.push_back(I); } } } } - } - // second pass pass - finalize and connect - if (Changed) { - // Finalize each element - for (SplitsMap::iterator I = Splits.begin(); I != Splits.end(); I++) { - //dump("finalize"); dumpIR(I->first); - finalizeInst(I->first); - } - // Remove original illegal values - if (!getenv("I64DEV")) { // XXX during development - // First, unlink them - Type *i64 = Type::getInt64Ty(TheModule->getContext()); - Value *Zero = Constant::getNullValue(i64); - for (SplitsMap::iterator I = Splits.begin(); I != Splits.end(); I++) { - //dump("unlink"); dumpIR(I->first); - int Num = I->first->getNumOperands(); - for (int i = 0; i < Num; i++) { // -1 is the type of I itself - Value *V = I->first->getOperand(i); - Type *T = V->getType(); - if (isIllegal(T)) { - I->first->setOperand(i, Zero); - } + // Fix up PHI node operands. + while (!Phis.empty()) { + PHINode *PN = Phis.pop_back_val(); + ChunksVec OutputChunks = getChunks(PN); + for (unsigned j = 0, je = PN->getNumIncomingValues(); j != je; ++j) { + Value *Op = PN->getIncomingValue(j); + ChunksVec InputChunks = getChunks(Op); + for (unsigned k = 0, ke = OutputChunks.size(); k != ke; ++k) { + PHINode *NewPN = cast<PHINode>(OutputChunks[k]); + NewPN->addIncoming(InputChunks[k], PN->getIncomingBlock(j)); } } + PN->dropAllReferences(); + } - // Now actually remove them - for (SplitsMap::iterator I = Splits.begin(); I != Splits.end(); I++) { - //dump("delete"); dumpIR(I->first); - I->first->eraseFromParent(); - } + // Delete instructions which were replaced. We do this after the full walk + // of the instructions so that all uses are replaced first. + while (!Dead.empty()) { + Instruction *D = Dead.pop_back_val(); + D->eraseFromParent(); } } - // post pass - clean up illegal functions that were legalized + // post pass - clean up illegal functions that were legalized. We do this + // after the full walk of the functions so that all uses are replaced first. for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { Function *Func = Iter++; removeIllegalFunc(Func); } - // remove bitcasts that were introduced while legalizing functions - for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { - Function *Func = Iter++; - for (Function::iterator BB = Func->begin(), E = Func->end(); - BB != E; - ++BB) { - for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); - Iter != E; ) { - Instruction *I = Iter++; - unsigned Opcode = I->getOpcode(); - if (Opcode == Instruction::BitCast || Opcode == Instruction::PtrToInt) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I->getOperand(0))) { - assert(CE->getOpcode() == Instruction::BitCast); - assert(isa<FunctionType>(cast<PointerType>(CE->getType())->getElementType())); - I->setOperand(0, CE->getOperand(0)); - } - } - } - } - } - return Changed; } +void ExpandI64::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DataLayout>(); + ModulePass::getAnalysisUsage(AU); +} + ModulePass *llvm::createExpandI64Pass() { return new ExpandI64(); } - diff --git a/test/Transforms/NaCl/expand-i64.ll b/test/Transforms/NaCl/expand-i64.ll new file mode 100644 index 0000000000..2997264058 --- /dev/null +++ b/test/Transforms/NaCl/expand-i64.ll @@ -0,0 +1,272 @@ +; RUN: opt -S -expand-illegal-ints < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" + +; CHECK: define i32 @add(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @i64Add(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @add(i64 %a, i64 %b) { + %c = add i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @sub(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @i64Subtract(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @sub(i64 %a, i64 %b) { + %c = sub i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @mul(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @__muldi3(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @mul(i64 %a, i64 %b) { + %c = mul i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @sdiv(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @__divdi3(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @sdiv(i64 %a, i64 %b) { + %c = sdiv i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @udiv(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @__udivdi3(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @udiv(i64 %a, i64 %b) { + %c = udiv i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @srem(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @__remdi3(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @srem(i64 %a, i64 %b) { + %c = srem i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @urem(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @__uremdi3(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @urem(i64 %a, i64 %b) { + %c = urem i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @and(i32, i32, i32, i32) { +; CHECK: %5 = and i32 %0, %2 +; CHECK: %6 = and i32 %1, %3 +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @and(i64 %a, i64 %b) { + %c = and i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @or(i32, i32, i32, i32) { +; CHECK: %5 = or i32 %0, %2 +; CHECK: %6 = or i32 %1, %3 +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @or(i64 %a, i64 %b) { + %c = or i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @xor(i32, i32, i32, i32) { +; CHECK: %5 = xor i32 %0, %2 +; CHECK: %6 = xor i32 %1, %3 +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @xor(i64 %a, i64 %b) { + %c = xor i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @lshr(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @bitshift64Lshr(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @lshr(i64 %a, i64 %b) { + %c = lshr i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @ashr(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @bitshift64Ashr(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @ashr(i64 %a, i64 %b) { + %c = ashr i64 %a, %b + ret i64 %c +} + +; CHECK: define i32 @shl(i32, i32, i32, i32) { +; CHECK: %5 = call i32 @bitshift64Shl(i32 %0, i32 %1, i32 %2, i32 %3) +; CHECK: %6 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %5 +; CHECK: } +define i64 @shl(i64 %a, i64 %b) { + %c = shl i64 %a, %b + ret i64 %c +} + + +; CHECK: define i32 @icmp_eq(i32, i32, i32, i32) { +; CHECK: %5 = icmp eq i32 %0, %2 +; CHECK: %6 = icmp eq i32 %1, %3 +; CHECK: %7 = and i1 %5, %6 +; CHECK: %d = zext i1 %7 to i32 +; CHECK: ret i32 %d +; CHECK: } +define i32 @icmp_eq(i64 %a, i64 %b) { + %c = icmp eq i64 %a, %b + %d = zext i1 %c to i32 + ret i32 %d +} + +; CHECK: define i32 @icmp_ne(i32, i32, i32, i32) { +; CHECK: %5 = icmp ne i32 %0, %2 +; CHECK: %6 = icmp ne i32 %1, %3 +; CHECK: %7 = or i1 %5, %6 +; CHECK: %d = zext i1 %7 to i32 +; CHECK: ret i32 %d +; CHECK: } +define i32 @icmp_ne(i64 %a, i64 %b) { + %c = icmp ne i64 %a, %b + %d = zext i1 %c to i32 + ret i32 %d +} + +; CHECK: define i32 @icmp_slt(i32, i32, i32, i32) { +; CHECK: %5 = icmp slt i32 %1, %3 +; CHECK: %6 = icmp eq i32 %1, %3 +; CHECK: %7 = icmp ult i32 %0, %2 +; CHECK: %8 = and i1 %6, %7 +; CHECK: %9 = or i1 %5, %8 +; CHECK: %d = zext i1 %9 to i32 +; CHECK: ret i32 %d +; CHECK: } +define i32 @icmp_slt(i64 %a, i64 %b) { + %c = icmp slt i64 %a, %b + %d = zext i1 %c to i32 + ret i32 %d +} + +; CHECK: define i32 @icmp_ult(i32, i32, i32, i32) { +; CHECK: %5 = icmp ult i32 %1, %3 +; CHECK: %6 = icmp eq i32 %1, %3 +; CHECK: %7 = icmp ult i32 %0, %2 +; CHECK: %8 = and i1 %6, %7 +; CHECK: %9 = or i1 %5, %8 +; CHECK: %d = zext i1 %9 to i32 +; CHECK: ret i32 %d +; CHECK: } +define i32 @icmp_ult(i64 %a, i64 %b) { + %c = icmp ult i64 %a, %b + %d = zext i1 %c to i32 + ret i32 %d +} + +; CHECK: define i32 @load(i64* %a) { +; CHECK: %1 = ptrtoint i64* %a to i32 +; CHECK: %2 = inttoptr i32 %1 to i32* +; CHECK: %3 = load i32* %2 +; CHECK: %4 = add i32 %1, 4 +; CHECK: %5 = inttoptr i32 %4 to i32* +; CHECK: %6 = load i32* %5 +; CHECK: call void @setHigh32(i32 %6) +; CHECK: ret i32 %3 +; CHECK: } +define i64 @load(i64 *%a) { + %c = load i64* %a + ret i64 %c +} + +; CHECK: define void @store(i64* %a, i32, i32) { +; CHECK: %3 = ptrtoint i64* %a to i32 +; CHECK: %4 = inttoptr i32 %3 to i32* +; CHECK: store i32 %0, i32* %4 +; CHECK: %5 = add i32 %3, 4 +; CHECK: %6 = inttoptr i32 %5 to i32* +; CHECK: store i32 %1, i32* %6 +; CHECK: ret void +; CHECK: } +define void @store(i64 *%a, i64 %b) { + store i64 %b, i64* %a + ret void +} + +; CHECK: define i32 @call(i32, i32) { +; CHECK: %3 = call i32 @foo(i32 %0, i32 %1) +; CHECK: %4 = call i32 @getHigh32() +; CHECK: call void @setHigh32(i32 %4) +; CHECK: ret i32 %3 +; CHECK: } +declare i64 @foo(i64 %arg) +define i64 @call(i64 %arg) { + %ret = call i64 @foo(i64 %arg) + ret i64 %ret +} + +; CHECK: define i32 @trunc(i32, i32) { +; CHECK: ret i32 %0 +; CHECK: } +define i32 @trunc(i64 %x) { + %y = trunc i64 %x to i32 + ret i32 %y +} + +; CHECK: define i32 @zext(i32 %x) { +; CHECK: call void @setHigh32(i32 0) +; CHECK: ret i32 %x +; CHECK: } +define i64 @zext(i32 %x) { + %y = zext i32 %x to i64 + ret i64 %y +} + +; CHECK: define i32 @sext(i32 %x) { +; CHECK: %1 = icmp slt i32 %x, 0 +; CHECK: %2 = sext i1 %1 to i32 +; CHECK: call void @setHigh32(i32 %2) +; CHECK: ret i32 %x +; CHECK: } +define i64 @sext(i32 %x) { + %y = sext i32 %x to i64 + ret i64 %y +} |