diff options
Diffstat (limited to 'lib/Transforms/Scalar')
33 files changed, 1527 insertions, 5038 deletions
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index f43baf5a76..a097308640 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -20,9 +20,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/InstIterator.h" diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp index 6214e3b703..e755008808 100644 --- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -30,7 +30,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include <set> diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b3fc6e338c..fd55e082ac 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts LoopUnswitch.cpp LowerAtomic.cpp MemCpyOptimizer.cpp - ObjCARC.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index e6abfdf581..015fd2e6e6 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -23,14 +23,14 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" @@ -41,7 +41,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" @@ -106,6 +105,8 @@ namespace { } bool runOnFunction(Function &F); + const char *getPassName() const { return "CodeGen Prepare"; } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); AU.addPreserved<ProfileInfo>(); @@ -148,11 +149,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis<TargetLibraryInfo>(); DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); - OptSize = F.getFnAttributes().hasAttribute(Attributes::OptimizeForSize); + OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. - if (TLI && TLI->isSlowDivBypassed()) { + if (!OptSize && TLI && TLI->isSlowDivBypassed()) { const DenseMap<unsigned int, unsigned int> &BypassWidths = TLI->getBypassSlowDivWidths(); for (Function::iterator I = F.begin(); I != F.end(); I++) @@ -727,9 +729,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { // It's not safe to eliminate the sign / zero extension of the return value. // See llvm::isInTailCallPosition(). const Function *F = BB->getParent(); - Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if (CallerRetAttr.hasAttribute(Attributes::ZExt) || - CallerRetAttr.hasAttribute(Attributes::SExt)) + AttributeSet CallerAttrs = F->getAttributes(); + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; // Make sure there are no instructions between the PHI and return, or that the @@ -786,11 +788,11 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { // Conservatively require the attributes of the call to match those of the // return. Ignore noalias because it doesn't affect the call sequence. - Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes(); - if (AttrBuilder(CalleeRetAttr). - removeAttribute(Attributes::NoAlias) != - AttrBuilder(CallerRetAttr). - removeAttribute(Attributes::NoAlias)) + AttributeSet CalleeAttrs = CS.getAttributes(); + if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias) != + AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias)) continue; // Make sure the call instruction is followed by an unconditional branch to @@ -817,6 +819,629 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { // Memory Optimization //===----------------------------------------------------------------------===// +namespace { + +/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode +/// which holds actual Value*'s for register values. +struct ExtAddrMode : public TargetLowering::AddrMode { + Value *BaseReg; + Value *ScaledReg; + ExtAddrMode() : BaseReg(0), ScaledReg(0) {} + void print(raw_ostream &OS) const; + void dump() const; + + bool operator==(const ExtAddrMode& O) const { + return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && + (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && + (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); + } +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { + AM.print(OS); + return OS; +} + +void ExtAddrMode::print(raw_ostream &OS) const { + bool NeedPlus = false; + OS << "["; + if (BaseGV) { + OS << (NeedPlus ? " + " : "") + << "GV:"; + WriteAsOperand(OS, BaseGV, /*PrintType=*/false); + NeedPlus = true; + } + + if (BaseOffs) + OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + + if (BaseReg) { + OS << (NeedPlus ? " + " : "") + << "Base:"; + WriteAsOperand(OS, BaseReg, /*PrintType=*/false); + NeedPlus = true; + } + if (Scale) { + OS << (NeedPlus ? " + " : "") + << Scale << "*"; + WriteAsOperand(OS, ScaledReg, /*PrintType=*/false); + NeedPlus = true; + } + + OS << ']'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ExtAddrMode::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + + +/// \brief A helper class for matching addressing modes. +/// +/// This encapsulates the logic for matching the target-legal addressing modes. +class AddressingModeMatcher { + SmallVectorImpl<Instruction*> &AddrModeInsts; + const TargetLowering &TLI; + + /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and + /// the memory instruction that we're computing this address for. + Type *AccessTy; + Instruction *MemoryInst; + + /// AddrMode - This is the addressing mode that we're building up. This is + /// part of the return value of this addressing mode matching stuff. + ExtAddrMode &AddrMode; + + /// IgnoreProfitability - This is set to true when we should not do + /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode + /// always returns true. + bool IgnoreProfitability; + + AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, + const TargetLowering &T, Type *AT, + Instruction *MI, ExtAddrMode &AM) + : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) { + IgnoreProfitability = false; + } +public: + + /// Match - Find the maximal addressing mode that a load/store of V can fold, + /// give an access type of AccessTy. This returns a list of involved + /// instructions in AddrModeInsts. + static ExtAddrMode Match(Value *V, Type *AccessTy, + Instruction *MemoryInst, + SmallVectorImpl<Instruction*> &AddrModeInsts, + const TargetLowering &TLI) { + ExtAddrMode Result; + + bool Success = + AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, + MemoryInst, Result).MatchAddr(V, 0); + (void)Success; assert(Success && "Couldn't select *anything*?"); + return Result; + } +private: + bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); + bool MatchAddr(Value *V, unsigned Depth); + bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth); + bool IsProfitableToFoldIntoAddressingMode(Instruction *I, + ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter); + bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); +}; + +/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. +/// Return true and update AddrMode if this addr mode is legal for the target, +/// false if not. +bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, + unsigned Depth) { + // If Scale is 1, then this is the same as adding ScaleReg to the addressing + // mode. Just process that directly. + if (Scale == 1) + return MatchAddr(ScaleReg, Depth); + + // If the scale is 0, it takes nothing to add this. + if (Scale == 0) + return true; + + // If we already have a scale of this value, we can add to it, otherwise, we + // need an available scale field. + if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) + return false; + + ExtAddrMode TestAddrMode = AddrMode; + + // Add scale to turn X*4+X*3 -> X*7. This could also do things like + // [A+B + A*7] -> [B+A*8]. + TestAddrMode.Scale += Scale; + TestAddrMode.ScaledReg = ScaleReg; + + // If the new address isn't legal, bail out. + if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) + return false; + + // It was legal, so commit it. + AddrMode = TestAddrMode; + + // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now + // to see if ScaleReg is actually X+C. If so, we can turn this into adding + // X*Scale + C*Scale to addr mode. + ConstantInt *CI = 0; Value *AddLHS = 0; + if (isa<Instruction>(ScaleReg) && // not a constant expr. + match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { + TestAddrMode.ScaledReg = AddLHS; + TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; + + // If this addressing mode is legal, commit it and remember that we folded + // this instruction. + if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { + AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); + AddrMode = TestAddrMode; + return true; + } + } + + // Otherwise, not (x+c)*scale, just return what we have. + return true; +} + +/// MightBeFoldableInst - This is a little filter, which returns true if an +/// addressing computation involving I might be folded into a load/store +/// accessing it. This doesn't need to be perfect, but needs to accept at least +/// the set of instructions that MatchOperationAddr can. +static bool MightBeFoldableInst(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::BitCast: + // Don't touch identity bitcasts. + if (I->getType() == I->getOperand(0)->getType()) + return false; + return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return true; + case Instruction::IntToPtr: + // We know the input is intptr_t, so this is foldable. + return true; + case Instruction::Add: + return true; + case Instruction::Mul: + case Instruction::Shl: + // Can only handle X*C and X << C. + return isa<ConstantInt>(I->getOperand(1)); + case Instruction::GetElementPtr: + return true; + default: + return false; + } +} + +/// MatchOperationAddr - Given an instruction or constant expr, see if we can +/// fold the operation into the addressing mode. If so, update the addressing +/// mode and return true, otherwise return false without modifying AddrMode. +bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, + unsigned Depth) { + // Avoid exponential behavior on extremely deep expression trees. + if (Depth >= 5) return false; + + switch (Opcode) { + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return MatchAddr(AddrInst->getOperand(0), Depth); + case Instruction::IntToPtr: + // This inttoptr is a no-op if the integer type is pointer sized. + if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == + TLI.getPointerTy()) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::BitCast: + // BitCast is always a noop, and we can handle it as long as it is + // int->int or pointer->pointer (we don't want int<->fp or something). + if ((AddrInst->getOperand(0)->getType()->isPointerTy() || + AddrInst->getOperand(0)->getType()->isIntegerTy()) && + // Don't touch identity bitcasts. These were probably put here by LSR, + // and we don't want to mess around with them. Assume it knows what it + // is doing. + AddrInst->getOperand(0)->getType() != AddrInst->getType()) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::Add: { + // Check to see if we can merge in the RHS then the LHS. If so, we win. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + if (MatchAddr(AddrInst->getOperand(1), Depth+1) && + MatchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + + // Restore the old addr mode info. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + + // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. + if (MatchAddr(AddrInst->getOperand(0), Depth+1) && + MatchAddr(AddrInst->getOperand(1), Depth+1)) + return true; + + // Otherwise we definitely can't merge the ADD in. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + break; + } + //case Instruction::Or: + // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. + //break; + case Instruction::Mul: + case Instruction::Shl: { + // Can only handle X*C and X << C. + ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); + if (!RHS) return false; + int64_t Scale = RHS->getSExtValue(); + if (Opcode == Instruction::Shl) + Scale = 1LL << Scale; + + return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); + } + case Instruction::GetElementPtr: { + // Scan the GEP. We check it if it contains constant offsets and at most + // one variable offset. + int VariableOperand = -1; + unsigned VariableScale = 0; + + int64_t ConstantOffset = 0; + const DataLayout *TD = TLI.getDataLayout(); + gep_type_iterator GTI = gep_type_begin(AddrInst); + for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = + cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); + ConstantOffset += SL->getElementOffset(Idx); + } else { + uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); + if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { + ConstantOffset += CI->getSExtValue()*TypeSize; + } else if (TypeSize) { // Scales of zero don't do anything. + // We only allow one variable index at the moment. + if (VariableOperand != -1) + return false; + + // Remember the variable index. + VariableOperand = i; + VariableScale = TypeSize; + } + } + } + + // A common case is for the GEP to only do a constant offset. In this case, + // just add it to the disp field and check validity. + if (VariableOperand == -1) { + AddrMode.BaseOffs += ConstantOffset; + if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ + // Check to see if we can fold the base pointer in too. + if (MatchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + } + AddrMode.BaseOffs -= ConstantOffset; + return false; + } + + // Save the valid addressing mode in case we can't match. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // See if the scale and offset amount is valid for this target. + AddrMode.BaseOffs += ConstantOffset; + + // Match the base operand of the GEP. + if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { + // If it couldn't be matched, just stuff the value in a register. + if (AddrMode.HasBaseReg) { + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + } + + // Match the remaining variable portion of the GEP. + if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, + Depth)) { + // If it couldn't be matched, try stuffing the base into a register + // instead of matching it, and retrying the match of the scale. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + if (AddrMode.HasBaseReg) + return false; + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + AddrMode.BaseOffs += ConstantOffset; + if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), + VariableScale, Depth)) { + // If even that didn't work, bail. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + } + + return true; + } + } + return false; +} + +/// MatchAddr - If we can, try to add the value of 'Addr' into the current +/// addressing mode. If Addr can't be added to AddrMode this returns false and +/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type +/// or intptr_t for the target. +/// +bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { + // Fold in immediates if legal for the target. + AddrMode.BaseOffs += CI->getSExtValue(); + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.BaseOffs -= CI->getSExtValue(); + } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { + // If this is a global variable, try to fold it into the addressing mode. + if (AddrMode.BaseGV == 0) { + AddrMode.BaseGV = GV; + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.BaseGV = 0; + } + } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // Check to see if it is possible to fold this operation. + if (MatchOperationAddr(I, I->getOpcode(), Depth)) { + // Okay, it's possible to fold this. Check to see if it is actually + // *profitable* to do so. We use a simple cost model to avoid increasing + // register pressure too much. + if (I->hasOneUse() || + IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { + AddrModeInsts.push_back(I); + return true; + } + + // It isn't profitable to do this, roll back. + //cerr << "NOT FOLDING: " << *I; + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { + if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) + return true; + } else if (isa<ConstantPointerNull>(Addr)) { + // Null pointer gets folded without affecting the addressing mode. + return true; + } + + // Worse case, the target should support [reg] addressing modes. :) + if (!AddrMode.HasBaseReg) { + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = Addr; + // Still check for legality in case the target supports [imm] but not [i+r]. + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.HasBaseReg = false; + AddrMode.BaseReg = 0; + } + + // If the base register is already taken, see if we can do [r+r]. + if (AddrMode.Scale == 0) { + AddrMode.Scale = 1; + AddrMode.ScaledReg = Addr; + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.Scale = 0; + AddrMode.ScaledReg = 0; + } + // Couldn't match. + return false; +} + +/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified +/// inline asm call are due to memory operands. If so, return true, otherwise +/// return false. +static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, + const TargetLowering &TLI) { + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // If this asm operand is our Value*, and if it isn't an indirect memory + // operand, we can't fold it! + if (OpInfo.CallOperandVal == OpVal && + (OpInfo.ConstraintType != TargetLowering::C_Memory || + !OpInfo.isIndirect)) + return false; + } + + return true; +} + +/// FindAllMemoryUses - Recursively walk all the uses of I until we find a +/// memory use. If we find an obviously non-foldable instruction, return true. +/// Add the ultimately found memory instructions to MemoryUses. +static bool FindAllMemoryUses(Instruction *I, + SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, + SmallPtrSet<Instruction*, 16> &ConsideredInsts, + const TargetLowering &TLI) { + // If we already considered this instruction, we're done. + if (!ConsideredInsts.insert(I)) + return false; + + // If this is an obviously unfoldable instruction, bail out. + if (!MightBeFoldableInst(I)) + return true; + + // Loop over all the uses, recursively processing them. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + User *U = *UI; + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + unsigned opNo = UI.getOperandNo(); + if (opNo == 0) return true; // Storing addr, not into addr. + MemoryUses.push_back(std::make_pair(SI, opNo)); + continue; + } + + if (CallInst *CI = dyn_cast<CallInst>(U)) { + InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); + if (!IA) return true; + + // If this is a memory operand, we're cool, otherwise bail out. + if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) + return true; + continue; + } + + if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts, + TLI)) + return true; + } + + return false; +} + +/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at +/// the use site that we're folding it into. If so, there is no cost to +/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values +/// that we know are live at the instruction already. +bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, + Value *KnownLive2) { + // If Val is either of the known-live values, we know it is live! + if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) + return true; + + // All values other than instructions and arguments (e.g. constants) are live. + if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; + + // If Val is a constant sized alloca in the entry block, it is live, this is + // true because it is just a reference to the stack/frame pointer, which is + // live for the whole function. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) + if (AI->isStaticAlloca()) + return true; + + // Check to see if this value is already used in the memory instruction's + // block. If so, it's already live into the block at the very least, so we + // can reasonably fold it. + return Val->isUsedInBasicBlock(MemoryInst->getParent()); +} + +/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing +/// mode of the machine to fold the specified instruction into a load or store +/// that ultimately uses it. However, the specified instruction has multiple +/// uses. Given this, it may actually increase register pressure to fold it +/// into the load. For example, consider this code: +/// +/// X = ... +/// Y = X+1 +/// use(Y) -> nonload/store +/// Z = Y+1 +/// load Z +/// +/// In this case, Y has multiple uses, and can be folded into the load of Z +/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to +/// be live at the use(Y) line. If we don't fold Y into load Z, we use one +/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the +/// number of computations either. +/// +/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If +/// X was live across 'load Z' for other reasons, we actually *would* want to +/// fold the addressing mode in the Z case. This would make Y die earlier. +bool AddressingModeMatcher:: +IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter) { + if (IgnoreProfitability) return true; + + // AMBefore is the addressing mode before this instruction was folded into it, + // and AMAfter is the addressing mode after the instruction was folded. Get + // the set of registers referenced by AMAfter and subtract out those + // referenced by AMBefore: this is the set of values which folding in this + // address extends the lifetime of. + // + // Note that there are only two potential values being referenced here, + // BaseReg and ScaleReg (global addresses are always available, as are any + // folded immediates). + Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; + + // If the BaseReg or ScaledReg was referenced by the previous addrmode, their + // lifetime wasn't extended by adding this instruction. + if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + BaseReg = 0; + if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + ScaledReg = 0; + + // If folding this instruction (and it's subexprs) didn't extend any live + // ranges, we're ok with it. + if (BaseReg == 0 && ScaledReg == 0) + return true; + + // If all uses of this instruction are ultimately load/store/inlineasm's, + // check to see if their addressing modes will include this instruction. If + // so, we can fold it into all uses, so it doesn't matter if it has multiple + // uses. + SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; + SmallPtrSet<Instruction*, 16> ConsideredInsts; + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) + return false; // Has a non-memory, non-foldable use! + + // Now that we know that all uses of this instruction are part of a chain of + // computation involving only operations that could theoretically be folded + // into a memory use, loop over each of these uses and see if they could + // *actually* fold the instruction. + SmallVector<Instruction*, 32> MatchedAddrModeInsts; + for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { + Instruction *User = MemoryUses[i].first; + unsigned OpNo = MemoryUses[i].second; + + // Get the access type of this use. If the use isn't a pointer, we don't + // know what it accesses. + Value *Address = User->getOperand(OpNo); + if (!Address->getType()->isPointerTy()) + return false; + Type *AddressAccessTy = + cast<PointerType>(Address->getType())->getElementType(); + + // Do a match against the root of this address, ignoring profitability. This + // will tell us if the addressing mode for the memory operation will + // *actually* cover the shared instruction. + ExtAddrMode Result; + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, + MemoryInst, Result); + Matcher.IgnoreProfitability = true; + bool Success = Matcher.MatchAddr(Address, 0); + (void)Success; assert(Success && "Couldn't select *anything*?"); + + // If the match didn't cover I, then it won't be shared by it. + if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), + I) == MatchedAddrModeInsts.end()) + return false; + + MatchedAddrModeInsts.clear(); + } + + return true; +} + +} // end anonymous namespace + /// IsNonLocalValue - Return true if the specified values are defined in a /// different basic block than BB. static bool IsNonLocalValue(Value *V, BasicBlock *BB) { diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 27efde53cd..d5a96eceb9 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -22,9 +22,9 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Constant.h" -#include "llvm/DataLayout.h" -#include "llvm/Instruction.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" #include "llvm/Target/TargetLibraryInfo.h" diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index b5a2a25ba0..995782e1bc 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -16,11 +16,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -97,12 +99,29 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { Value *Incoming = P->getIncomingValue(i); if (isa<Constant>(Incoming)) continue; - Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i), - P->getIncomingBlock(i), - BB); - if (!C) continue; + Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB); - P->setIncomingValue(i, C); + // Look if the incoming value is a select with a constant but LVI tells us + // that the incoming value can never be that constant. In that case replace + // the incoming value with the other value of the select. This often allows + // us to remove the select later. + if (!V) { + SelectInst *SI = dyn_cast<SelectInst>(Incoming); + if (!SI) continue; + + Constant *C = dyn_cast<Constant>(SI->getFalseValue()); + if (!C) continue; + + if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, + P->getIncomingBlock(i), BB) != + LazyValueInfo::False) + continue; + + DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n'); + V = SI->getTrueValue(); + } + + P->setIncomingValue(i, V); Changed = true; } diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index f260331c6d..e8a090af40 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -19,7 +19,7 @@ #define DEBUG_TYPE "dce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Instruction.h" +#include "llvm/IR/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" #include "llvm/Target/TargetLibraryInfo.h" diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 124892887c..57432c7d71 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -26,12 +26,12 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLibraryInfo.h" @@ -376,10 +376,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any // other store to the same object. - const DataLayout &TD = *AA.getDataLayout(); + const DataLayout *TD = AA.getDataLayout(); - const Value *UO1 = GetUnderlyingObject(P1, &TD), - *UO2 = GetUnderlyingObject(P2, &TD); + const Value *UO1 = GetUnderlyingObject(P1, TD), + *UO2 = GetUnderlyingObject(P2, TD); // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 6b622c73f0..3c08634bfe 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -19,8 +19,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/DataLayout.h" -#include "llvm/Instructions.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 1c540b240c..c04b447f1c 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -32,12 +32,12 @@ #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -849,8 +849,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); - Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); + Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD); if (StoreBase != LoadBase) return -1; @@ -945,7 +945,7 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, // then we should widen it! int64_t LoadOffs = 0; const Value *LoadBase = - GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD); unsigned LoadSize = TD.getTypeStoreSize(LoadTy); unsigned Size = MemoryDependenceAnalysis:: @@ -1526,10 +1526,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool isSinglePred = false; bool allSingleSucc = true; while (TmpBB->getSinglePredecessor()) { - isSinglePred = true; TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. return false; @@ -1548,28 +1546,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { assert(TmpBB); LoadBB = TmpBB; - // FIXME: It is extremely unclear what this loop is doing, other than - // artificially restricting loadpre. - if (isSinglePred) { - bool isHot = false; - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - const AvailableValueInBlock &AV = ValuesPerBlock[i]; - if (AV.isSimpleValue()) - // "Hot" Instruction is in some loop (because it dominates its dep. - // instruction). - if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) - if (DT->dominates(LI, I)) { - isHot = true; - break; - } - } - - // We are interested only in "hot" instructions. We don't want to do any - // mis-optimizations here. - if (!isHot) - return false; - } - // Check to see how many predecessors have the loaded value fully // available. DenseMap<BasicBlock*, Value*> PredLoads; @@ -2371,8 +2347,8 @@ bool GVN::processBlock(BasicBlock *BB) { E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); - (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); + (*I)->eraseFromParent(); } InstrsToErase.clear(); @@ -2389,7 +2365,7 @@ bool GVN::processBlock(BasicBlock *BB) { /// control flow patterns and attempts to perform simple PRE at the join point. bool GVN::performPRE(Function &F) { bool Changed = false; - DenseMap<BasicBlock*, Value*> predMap; + SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap; for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { BasicBlock *CurrentBlock = *DI; @@ -2445,19 +2421,22 @@ bool GVN::performPRE(Function &F) { if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!DT->dominates(&F.getEntryBlock(), P)) { + } else if (!DT->isReachableFromEntry(P)) { NumWithout = 2; break; } Value* predV = findLeader(P, ValNo); if (predV == 0) { + predMap.push_back(std::make_pair(static_cast<Value *>(0), P)); PREPred = P; ++NumWithout; } else if (predV == CurInst) { + /* CurInst dominates this predecessor. */ NumWithout = 2; + break; } else { - predMap[P] = predV; + predMap.push_back(std::make_pair(predV, P)); ++NumWith; } } @@ -2504,15 +2483,14 @@ bool GVN::performPRE(Function &F) { // the PRE predecessor. This is typically because of loads which // are not value numbered precisely. if (!success) { - delete PREInstr; DEBUG(verifyRemoved(PREInstr)); + delete PREInstr; continue; } PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); PREInstr->setDebugLoc(CurInst->getDebugLoc()); - predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); ++NumGVNPRE; @@ -2520,13 +2498,14 @@ bool GVN::performPRE(Function &F) { addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); - PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), + PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = PB; PI != PE; ++PI) { - BasicBlock *P = *PI; - Phi->addIncoming(predMap[P], P); + for (unsigned i = 0, e = predMap.size(); i != e; ++i) { + if (Value *V = predMap[i].first) + Phi->addIncoming(V, predMap[i].second); + else + Phi->addIncoming(PREInstr, PREPred); } VN.add(Phi, ValNo); @@ -2551,8 +2530,8 @@ bool GVN::performPRE(Function &F) { DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); - CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); + CurInst->eraseFromParent(); Changed = true; } } diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 486a349c55..1601a8d646 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -54,15 +54,15 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Attributes.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -76,7 +76,7 @@ namespace { const TargetLowering *TLI; bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, - Module &M, bool isConst) const; + Module &M, bool isConst, unsigned AddrSpace) const; public: static char ID; // Pass identification, replacement for typeid. @@ -118,7 +118,7 @@ INITIALIZE_PASS(GlobalMerge, "global-merge", bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, - Module &M, bool isConst) const { + Module &M, bool isConst, unsigned AddrSpace) const { const DataLayout *TD = TLI->getDataLayout(); // FIXME: Infer the maximum possible offset depending on the actual users @@ -150,7 +150,9 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, GlobalValue::InternalLinkage, - MergedInit, "_MergedGlobals"); + MergedInit, "_MergedGlobals", + 0, GlobalVariable::NotThreadLocal, + AddrSpace); for (size_t k = i; k < j; ++k) { Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), @@ -169,7 +171,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, bool GlobalMerge::doInitialization(Module &M) { - SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals; + DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, + BSSGlobals; const DataLayout *TD = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; @@ -181,6 +184,11 @@ bool GlobalMerge::doInitialization(Module &M) { if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) continue; + PointerType *PT = dyn_cast<PointerType>(I->getType()); + assert(PT && "Global variable is not a pointer!"); + + unsigned AddressSpace = PT->getAddressSpace(); + // Ignore fancy-aligned globals for now. unsigned Alignment = TD->getPreferredAlignment(I); Type *Ty = I->getType()->getElementType(); @@ -195,18 +203,23 @@ bool GlobalMerge::doInitialization(Module &M) { if (TD->getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) .isBSSLocal()) - BSSGlobals.push_back(I); + BSSGlobals[AddressSpace].push_back(I); else if (I->isConstant()) - ConstGlobals.push_back(I); + ConstGlobals[AddressSpace].push_back(I); else - Globals.push_back(I); + Globals[AddressSpace].push_back(I); } } - if (Globals.size() > 1) - Changed |= doMerge(Globals, M, false); - if (BSSGlobals.size() > 1) - Changed |= doMerge(BSSGlobals, M, false); + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = Globals.begin(), E = Globals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, false, I->first); + + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, false, I->first); // FIXME: This currently breaks the EH processing due to way how the // typeinfo detection works. We might want to detect the TIs and ignore diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 29f5a10e09..97fff7e782 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -33,12 +33,13 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/BasicBlock.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -47,7 +48,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/Type.h" using namespace llvm; STATISTIC(NumWidened , "Number of indvars widened"); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 4a4cd705e2..b61c5ba56e 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -23,9 +23,9 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" -#include "llvm/DataLayout.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -249,7 +249,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, // as having cost of 2 total, and if they are a vector intrinsic, we model // them as having cost 1. if (const CallInst *CI = dyn_cast<CallInst>(I)) { - if (!isa<IntrinsicInst>(CI)) + if (CI->hasFnAttr(Attribute::NoDuplicate)) + // Blocks with NoDuplicate are modelled as having infinite cost, so they + // are never duplicated. + return ~0U; + else if (!isa<IntrinsicInst>(CI)) Size += 3; else if (!CI->getType()->isVectorTy()) Size += 1; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 7ef1d34d3f..f94cd2a073 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -40,12 +40,13 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -439,13 +440,12 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { } // Only these instructions are hoistable/sinkable. - bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) || - isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || - isa<CmpInst>(I) || isa<InsertElementInst>(I) || - isa<ExtractElementInst>(I) || - isa<ShuffleVectorInst>(I)); - if (!HoistableKind) - return false; + if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) && + !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) && + !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && + !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) && + !isa<InsertValueInst>(I)) + return false; return isSafeToExecuteUnconditionally(I); } @@ -665,16 +665,18 @@ namespace { AliasSetTracker &AST; DebugLoc DL; int Alignment; + MDNode *TBAATag; public: LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, SmallVectorImpl<BasicBlock*> &LEB, SmallVectorImpl<Instruction*> &LIP, - AliasSetTracker &ast, DebugLoc dl, int alignment) + AliasSetTracker &ast, DebugLoc dl, int alignment, + MDNode *TBAATag) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), - AST(ast), DL(dl), Alignment(alignment) {} + AST(ast), DL(dl), Alignment(alignment), TBAATag(TBAATag) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { @@ -698,6 +700,7 @@ namespace { StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); + if (TBAATag) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); } } @@ -751,10 +754,11 @@ void LICM::PromoteAliasSet(AliasSet &AS, // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; + MDNode *TBAATag = 0; // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in - // different sizes. + // different sizes. While we are at it, collect alignment and TBAA info. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { Value *ASIV = ASI->getValue(); PointerMustAliases.insert(ASIV); @@ -796,8 +800,7 @@ void LICM::PromoteAliasSet(AliasSet &AS, // instruction will be executed, update the alignment. // Larger is better, with the exception of 0 being the best alignment. unsigned InstAlignment = store->getAlignment(); - if ((InstAlignment > Alignment || InstAlignment == 0) - && (Alignment != 0)) + if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0) if (isGuaranteedToExecute(*Use)) { GuaranteedToExecute = true; Alignment = InstAlignment; @@ -809,6 +812,15 @@ void LICM::PromoteAliasSet(AliasSet &AS, } else return; // Not a load or store. + // Merge the TBAA tags. + if (LoopUses.empty()) { + // On the first load/store, just take its TBAA tag. + TBAATag = Use->getMetadata(LLVMContext::MD_tbaa); + } else if (TBAATag) { + TBAATag = MDNode::getMostGenericTBAA(TBAATag, + Use->getMetadata(LLVMContext::MD_tbaa)); + } + LoopUses.push_back(Use); } } @@ -841,7 +853,7 @@ void LICM::PromoteAliasSet(AliasSet &AS, SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, *CurAST, DL, Alignment); + InsertPts, *CurAST, DL, Alignment, TBAATag); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. @@ -850,6 +862,7 @@ void LICM::PromoteAliasSet(AliasSet &AS, Preheader->getTerminator()); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); + if (TBAATag) PreheaderLoad->setMetadata(LLVMContext::MD_tbaa, TBAATag); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Rewrite all the loads in the loop and remember all the definitions from diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 7807e9bb4f..8258719a02 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -48,15 +48,15 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -135,12 +135,12 @@ namespace { DominatorTree *DT; ScalarEvolution *SE; TargetLibraryInfo *TLI; - const ScalarTargetTransformInfo *STTI; + const TargetTransformInfo *TTI; public: static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - TD = 0; DT = 0; SE = 0; TLI = 0; STTI = 0; + TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0; } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -177,6 +177,7 @@ namespace { AU.addPreserved<DominatorTree>(); AU.addRequired<DominatorTree>(); AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetTransformInfo>(); } const DataLayout *getDataLayout() { @@ -195,12 +196,8 @@ namespace { return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>()); } - const ScalarTargetTransformInfo *getScalarTargetTransformInfo() { - if (!STTI) { - TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); - if (TTI) STTI = TTI->getScalarTargetTransformInfo(); - } - return STTI; + const TargetTransformInfo *getTargetTransformInfo() { + return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>()); } Loop *getLoop() const { return CurLoop; } @@ -221,6 +218,7 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", false, false) @@ -312,8 +310,8 @@ NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): } bool NclPopcountRecognize::preliminaryScreen() { - const ScalarTargetTransformInfo *STTI = LIR.getScalarTargetTransformInfo(); - if (STTI->getPopcntHwSupport(32) != ScalarTargetTransformInfo::Fast) + const TargetTransformInfo *TTI = LIR.getTargetTransformInfo(); + if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware) return false; // Counting population are usually conducted by few arithmetic instrutions. @@ -409,7 +407,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" { - if (DefX2->getOpcode() != Instruction::And) + if (!DefX2 || DefX2->getOpcode() != Instruction::And) return false; BinaryOperator *SubOneOp; @@ -631,7 +629,7 @@ CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, /// call, and return true; otherwise, return false. bool NclPopcountRecognize::recognize() { - if (!LIR.getScalarTargetTransformInfo()) + if (!LIR.getTargetTransformInfo()) return false; LIR.getScalarEvolution(); @@ -669,12 +667,14 @@ bool LoopIdiomRecognize::runOnCountableLoop() { if (!getDataLayout()) return false; - getDominatorTree(); + // set DT + (void)getDominatorTree(); LoopInfo &LI = getAnalysis<LoopInfo>(); TLI = &getAnalysis<TargetLibraryInfo>(); - getTargetLibraryInfo(); + // set TLI + (void)getTargetLibraryInfo(); SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 10ba22434a..a23860aad8 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -14,12 +14,13 @@ #define DEBUG_TYPE "loop-instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/DataLayout.h" -#include "llvm/Instructions.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 249baf5164..e98ae953e5 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -18,9 +18,10 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -51,6 +52,7 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<TargetTransformInfo>(); } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -59,11 +61,13 @@ namespace { private: LoopInfo *LI; + const TargetTransformInfo *TTI; }; } char LoopRotate::ID = 0; INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -75,6 +79,7 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } /// the loop is rotated at least once. bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); + TTI = &getAnalysis<TargetTransformInfo>(); // Simplify the loop latch before attempting to rotate the header // upward. Rotation may not be needed if the loop tail can be folded into the @@ -274,10 +279,16 @@ bool LoopRotate::rotateLoop(Loop *L) { if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) return false; - // Check size of original header and reject loop if it is very big. + // Check size of original header and reject loop if it is very big or we can't + // duplicate blocks inside it. { CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader); + Metrics.analyzeBasicBlock(OrigHeader, *TTI); + if (Metrics.notDuplicatable) { + DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable" + << " instructions: "; L->dump()); + return false; + } if (Metrics.NumInsts > MAX_HEADER_SIZE) return false; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index d571ba3fe0..4e4cb86464 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -37,8 +37,8 @@ // // TODO: Handle multiple loops at a time. // -// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr -// instead of a GlobalValue? +// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead +// of a GlobalValue? // // TODO: When truncation is free, truncate ICmp users' operands to make it a // smaller encoding (on x86 at least). @@ -58,21 +58,21 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/AddressingMode.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -224,16 +224,24 @@ namespace { /// computing satisfying a use. It may include broken-out immediates and scaled /// registers. struct Formula { - /// AM - This is used to represent complex addressing, as well as other kinds - /// of interesting uses. - AddrMode AM; + /// Global base address used for complex addressing. + GlobalValue *BaseGV; + + /// Base offset for complex addressing. + int64_t BaseOffset; + + /// Whether any complex addressing has a base register. + bool HasBaseReg; + + /// The scale of any complex addressing. + int64_t Scale; /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty, AM.HasBaseReg should be set to true. - SmallVector<const SCEV *, 2> BaseRegs; + /// non-empty, + SmallVector<const SCEV *, 4> BaseRegs; /// ScaledReg - The 'scaled' register for this use. This should be non-null - /// when AM.Scale is not zero. + /// when Scale is not zero. const SCEV *ScaledReg; /// UnfoldedOffset - An additional constant offset which added near the @@ -241,7 +249,9 @@ struct Formula { /// live in an add immediate field rather than a register. int64_t UnfoldedOffset; - Formula() : ScaledReg(0), UnfoldedOffset(0) {} + Formula() + : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0), + UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -327,13 +337,13 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { const SCEV *Sum = SE.getAddExpr(Good); if (!Sum->isZero()) BaseRegs.push_back(Sum); - AM.HasBaseReg = true; + HasBaseReg = true; } if (!Bad.empty()) { const SCEV *Sum = SE.getAddExpr(Bad); if (!Sum->isZero()) BaseRegs.push_back(Sum); - AM.HasBaseReg = true; + HasBaseReg = true; } } @@ -349,7 +359,7 @@ unsigned Formula::getNumRegs() const { Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : - AM.BaseGV ? AM.BaseGV->getType() : + BaseGV ? BaseGV->getType() : 0; } @@ -382,29 +392,29 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, void Formula::print(raw_ostream &OS) const { bool First = true; - if (AM.BaseGV) { + if (BaseGV) { if (!First) OS << " + "; else First = false; - WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false); + WriteAsOperand(OS, BaseGV, /*PrintType=*/false); } - if (AM.BaseOffs != 0) { + if (BaseOffset != 0) { if (!First) OS << " + "; else First = false; - OS << AM.BaseOffs; + OS << BaseOffset; } for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), E = BaseRegs.end(); I != E; ++I) { if (!First) OS << " + "; else First = false; OS << "reg(" << **I << ')'; } - if (AM.HasBaseReg && BaseRegs.empty()) { + if (HasBaseReg && BaseRegs.empty()) { if (!First) OS << " + "; else First = false; OS << "**error: HasBaseReg**"; - } else if (!AM.HasBaseReg && !BaseRegs.empty()) { + } else if (!HasBaseReg && !BaseRegs.empty()) { if (!First) OS << " + "; else First = false; OS << "**error: !HasBaseReg**"; } - if (AM.Scale != 0) { + if (Scale != 0) { if (!First) OS << " + "; else First = false; - OS << AM.Scale << "*reg("; + OS << Scale << "*reg("; if (ScaledReg) OS << *ScaledReg; else @@ -927,8 +937,8 @@ void Cost::RateFormula(const Formula &F, // Tally up the non-zero immediates. for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), E = Offsets.end(); I != E; ++I) { - int64_t Offset = (uint64_t)*I + F.AM.BaseOffs; - if (F.AM.BaseGV) + int64_t Offset = (uint64_t)*I + F.BaseOffset; + if (F.BaseGV) ImmCost += 64; // Handle symbolic values conservatively. // TODO: This should probably be the pointer size. else if (Offset != 0) @@ -1078,19 +1088,19 @@ namespace { /// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding /// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*. struct UniquifierDenseMapInfo { - static SmallVector<const SCEV *, 2> getEmptyKey() { - SmallVector<const SCEV *, 2> V; + static SmallVector<const SCEV *, 4> getEmptyKey() { + SmallVector<const SCEV *, 4> V; V.push_back(reinterpret_cast<const SCEV *>(-1)); return V; } - static SmallVector<const SCEV *, 2> getTombstoneKey() { - SmallVector<const SCEV *, 2> V; + static SmallVector<const SCEV *, 4> getTombstoneKey() { + SmallVector<const SCEV *, 4> V; V.push_back(reinterpret_cast<const SCEV *>(-2)); return V; } - static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) { + static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) { unsigned Result = 0; for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(), E = V.end(); I != E; ++I) @@ -1098,8 +1108,8 @@ struct UniquifierDenseMapInfo { return Result; } - static bool isEqual(const SmallVector<const SCEV *, 2> &LHS, - const SmallVector<const SCEV *, 2> &RHS) { + static bool isEqual(const SmallVector<const SCEV *, 4> &LHS, + const SmallVector<const SCEV *, 4> &RHS) { return LHS == RHS; } }; @@ -1110,7 +1120,7 @@ struct UniquifierDenseMapInfo { /// the user itself, and information about how the use may be satisfied. /// TODO: Represent multiple users of the same expression in common? class LSRUse { - DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier; + DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier; public: /// KindType - An enum for a kind of use, indicating what types of @@ -1169,7 +1179,7 @@ public: /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { - SmallVector<const SCEV *, 2> Key = F.BaseRegs; + SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. std::sort(Key.begin(), Key.end()); @@ -1179,7 +1189,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRUse::InsertFormula(const Formula &F) { - SmallVector<const SCEV *, 2> Key = F.BaseRegs; + SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. std::sort(Key.begin(), Key.end()); @@ -1270,46 +1280,42 @@ void LSRUse::dump() const { /// isLegalUse - Test whether the use described by AM is "legal", meaning it can /// be completely folded into the user instruction at isel time. This includes /// address-mode folding and special icmp tricks. -static bool isLegalUse(const AddrMode &AM, - LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { +static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, + Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { switch (Kind) { case LSRUse::Address: - // If we have low-level target information, ask the target if it can - // completely fold this address. - if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy); + return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); // Otherwise, just guess that reg+reg addressing is legal. - return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1; + //return ; case LSRUse::ICmpZero: // There's not even a target hook for querying whether it would be legal to // fold a GV into an ICmp. - if (AM.BaseGV) + if (BaseGV) return false; // ICmp only has two operands; don't allow more than two non-trivial parts. - if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0) + if (Scale != 0 && HasBaseReg && BaseOffset != 0) return false; // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by // putting the scaled register in the other operand of the icmp. - if (AM.Scale != 0 && AM.Scale != -1) + if (Scale != 0 && Scale != -1) return false; // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. - if (AM.BaseOffs != 0) { - if (!TLI) - return false; + if (BaseOffset != 0) { // We have one of: - // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset - // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset + // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset + // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset // Offs is the ICmp immediate. - int64_t Offs = AM.BaseOffs; - if (AM.Scale == 0) - Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN. - return TLI->isLegalICmpImmediate(Offs); + if (Scale == 0) + // The cast does the right thing with INT64_MIN. + BaseOffset = -(uint64_t)BaseOffset; + return TTI.isLegalICmpImmediate(BaseOffset); } // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg @@ -1317,92 +1323,87 @@ static bool isLegalUse(const AddrMode &AM, case LSRUse::Basic: // Only handle single-register values. - return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0; + return !BaseGV && Scale == 0 && BaseOffset == 0; case LSRUse::Special: // Special case Basic to handle -1 scales. - return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0; + return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0; } llvm_unreachable("Invalid LSRUse Kind!"); } -static bool isLegalUse(AddrMode AM, - int64_t MinOffset, int64_t MaxOffset, - LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { +static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) { // Check for overflow. - if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != + if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != (MinOffset > 0)) return false; - AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset; - if (isLegalUse(AM, Kind, AccessTy, TLI)) { - AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset; - // Check for overflow. - if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) != - (MaxOffset > 0)) - return false; - AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset; - return isLegalUse(AM, Kind, AccessTy, TLI); - } - return false; + MinOffset = (uint64_t)BaseOffset + MinOffset; + if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) != + (MaxOffset > 0)) + return false; + MaxOffset = (uint64_t)BaseOffset + MaxOffset; + + return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg, + Scale) && + isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale); } -static bool isAlwaysFoldable(int64_t BaseOffs, - GlobalValue *BaseGV, - bool HasBaseReg, +static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, + const Formula &F) { + return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, + F.BaseOffset, F.HasBaseReg, F.Scale); +} + +static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg) { // Fast-path: zero is always foldable. - if (BaseOffs == 0 && !BaseGV) return true; + if (BaseOffset == 0 && !BaseGV) return true; // Conservatively, create an address with an immediate and a // base and a scale. - AddrMode AM; - AM.BaseOffs = BaseOffs; - AM.BaseGV = BaseGV; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; // Canonicalize a scale of 1 to a base register if the formula doesn't // already have a base register. - if (!AM.HasBaseReg && AM.Scale == 1) { - AM.Scale = 0; - AM.HasBaseReg = true; + if (!HasBaseReg && Scale == 1) { + Scale = 0; + HasBaseReg = true; } - return isLegalUse(AM, Kind, AccessTy, TLI); + return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); } -static bool isAlwaysFoldable(const SCEV *S, - int64_t MinOffset, int64_t MaxOffset, - bool HasBaseReg, - LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI, - ScalarEvolution &SE) { +static bool isAlwaysFoldable(const TargetTransformInfo &TTI, + ScalarEvolution &SE, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, + Type *AccessTy, const SCEV *S, bool HasBaseReg) { // Fast-path: zero is always foldable. if (S->isZero()) return true; // Conservatively, create an address with an immediate and a // base and a scale. - int64_t BaseOffs = ExtractImmediate(S, SE); + int64_t BaseOffset = ExtractImmediate(S, SE); GlobalValue *BaseGV = ExtractSymbol(S, SE); // If there's anything else involved, it's not foldable. if (!S->isZero()) return false; // Fast-path: zero is always foldable. - if (BaseOffs == 0 && !BaseGV) return true; + if (BaseOffset == 0 && !BaseGV) return true; // Conservatively, create an address with an immediate and a // base and a scale. - AddrMode AM; - AM.BaseOffs = BaseOffs; - AM.BaseGV = BaseGV; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); + return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale); } namespace { @@ -1502,7 +1503,7 @@ class LSRInstance { ScalarEvolution &SE; DominatorTree &DT; LoopInfo &LI; - const TargetLowering *const TLI; + const TargetTransformInfo &TTI; Loop *const L; bool Changed; @@ -1638,7 +1639,7 @@ class LSRInstance { Pass *P); public: - LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); + LSRInstance(Loop *L, Pass *P); bool getChanged() const { return Changed; } @@ -1688,12 +1689,9 @@ void LSRInstance::OptimizeShadowIV() { } if (!DestTy) continue; - if (TLI) { - // If target does not support DestTy natively then do not apply - // this transformation. - EVT DVT = TLI->getValueType(DestTy); - if (!TLI->isTypeLegal(DVT)) continue; - } + // If target does not support DestTy natively then do not apply + // this transformation. + if (!TTI.isTypeLegal(DestTy)) continue; PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); if (!PH) continue; @@ -2015,18 +2013,17 @@ LSRInstance::OptimizeLoopTermCond() { if (C->getValue().getMinSignedBits() >= 64 || C->getValue().isMinSignedValue()) goto decline_post_inc; - // Without TLI, assume that any stride might be valid, and so any - // use might be shared. - if (!TLI) - goto decline_post_inc; // Check for possible scaled-address reuse. Type *AccessTy = getAccessType(UI->getUser()); - AddrMode AM; - AM.Scale = C->getSExtValue(); - if (TLI->isLegalAddressingMode(AM, AccessTy)) + int64_t Scale = C->getSExtValue(); + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + /*BaseOffset=*/ 0, + /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; - AM.Scale = -AM.Scale; - if (TLI->isLegalAddressingMode(AM, AccessTy)) + Scale = -Scale; + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + /*BaseOffset=*/ 0, + /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; } } @@ -2096,13 +2093,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, return false; // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg, - Kind, AccessTy, TLI)) + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + LU.MaxOffset - NewOffset, HasBaseReg)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg, - Kind, AccessTy, TLI)) + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + NewOffset - LU.MinOffset, HasBaseReg)) return false; NewMaxOffset = NewOffset; } @@ -2131,7 +2128,8 @@ LSRInstance::getUse(const SCEV *&Expr, int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. - if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) { + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + Offset, /*HasBaseReg=*/ true)) { Expr = Copy; Offset = 0; } @@ -2199,10 +2197,10 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, // as OrigF. if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && - F.AM.BaseGV == OrigF.AM.BaseGV && - F.AM.Scale == OrigF.AM.Scale && + F.BaseGV == OrigF.BaseGV && + F.Scale == OrigF.Scale && F.UnfoldedOffset == OrigF.UnfoldedOffset) { - if (F.AM.BaseOffs == 0) + if (F.BaseOffset == 0) return &LU; // This is the formula where all the registers and symbols matched; // there aren't going to be any others. Since we declined it, we @@ -2396,7 +2394,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr, /// TODO: Consider IVInc free if it's already used in another chains. static bool isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users, - ScalarEvolution &SE, const TargetLowering *TLI) { + ScalarEvolution &SE, const TargetTransformInfo &TTI) { if (StressIVChain) return true; @@ -2539,6 +2537,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, // Add this IV user to the end of the chain. IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); } + IVChain &Chain = IVChainVec[ChainIdx]; SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers; // This chain's NearUsers become FarUsers. @@ -2556,8 +2555,19 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, for (Value::use_iterator UseIter = IVOper->use_begin(), UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) { Instruction *OtherUse = dyn_cast<Instruction>(*UseIter); - if (!OtherUse || OtherUse == UserInst) + if (!OtherUse) + continue; + // Uses in the chain will no longer be uses if the chain is formed. + // Include the head of the chain in this iteration (not Chain.begin()). + IVChain::const_iterator IncIter = Chain.Incs.begin(); + IVChain::const_iterator IncEnd = Chain.Incs.end(); + for( ; IncIter != IncEnd; ++IncIter) { + if (IncIter->UserInst == OtherUse) + break; + } + if (IncIter != IncEnd) continue; + if (SE.isSCEVable(OtherUse->getType()) && !isa<SCEVUnknown>(SE.getSCEV(OtherUse)) && IU.isIVUserOrOperand(OtherUse)) { @@ -2654,7 +2664,7 @@ void LSRInstance::CollectChains() { for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); UsersIdx < NChains; ++UsersIdx) { if (!isProfitableChain(IVChainVec[UsersIdx], - ChainUsersVec[UsersIdx].FarUsers, SE, TLI)) + ChainUsersVec[UsersIdx].FarUsers, SE, TTI)) continue; // Preserve the chain at UsesIdx. if (ChainIdx != UsersIdx) @@ -2681,7 +2691,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { /// Return true if the IVInc can be folded into an addressing mode. static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, - Value *Operand, const TargetLowering *TLI) { + Value *Operand, const TargetTransformInfo &TTI) { const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr); if (!IncConst || !isAddressUse(UserInst, Operand)) return false; @@ -2690,8 +2700,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, return false; int64_t IncOffset = IncConst->getValue()->getSExtValue(); - if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false, - LSRUse::Address, getAccessType(UserInst), TLI)) + if (!isAlwaysFoldable(TTI, LSRUse::Address, + getAccessType(UserInst), /*BaseGV=*/ 0, + IncOffset, /*HaseBaseReg=*/ false)) return false; return true; @@ -2762,7 +2773,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // If an IV increment can't be folded, use it as the next IV value. if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand, - TLI)) { + TTI)) { assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); IVSrc = IVOper; LeftOverExpr = 0; @@ -2904,7 +2915,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; F.BaseRegs.push_back(S); - F.AM.HasBaseReg = true; + F.HasBaseReg = true; bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; } @@ -3106,9 +3117,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Don't pull a constant into a register if the constant could be folded // into an immediate field. - if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, - Base.getNumRegs() > 1, - LU.Kind, LU.AccessTy, TLI, SE)) + if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, *J, Base.getNumRegs() > 1)) continue; // Collect all operands except *J. @@ -3120,9 +3130,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. if (InnerAddOps.size() == 1 && - isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset, - Base.getNumRegs() > 1, - LU.Kind, LU.AccessTy, TLI, SE)) + isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) continue; const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); @@ -3132,10 +3141,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Add the remaining pieces of the add back into the new formula. const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); - if (TLI && InnerSumSC && + if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && - TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue())) { + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); F.BaseRegs.erase(F.BaseRegs.begin() + i); @@ -3144,9 +3153,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Add J as its own register, or an unfolded immediate. const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); - if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && - TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue())) + if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); else @@ -3195,7 +3204,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // We can't add a symbolic offset if the address already contains one. - if (Base.AM.BaseGV) return; + if (Base.BaseGV) return; for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *G = Base.BaseRegs[i]; @@ -3203,9 +3212,8 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, if (G->isZero() || !GV) continue; Formula F = Base; - F.AM.BaseGV = GV; - if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + F.BaseGV = GV; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) continue; F.BaseRegs[i] = G; (void)InsertFormula(LU, LUIdx, F); @@ -3228,9 +3236,9 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), E = Worklist.end(); I != E; ++I) { Formula F = Base; - F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; - if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, - LU.Kind, LU.AccessTy, TLI)) { + F.BaseOffset = (uint64_t)Base.BaseOffset - *I; + if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, + LU.AccessTy, F)) { // Add the offset to the base register. const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); // If it cancelled out, drop the base register, otherwise update it. @@ -3248,9 +3256,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (G->isZero() || Imm == 0) continue; Formula F = Base; - F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm; - if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + F.BaseOffset = (uint64_t)F.BaseOffset + Imm; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) continue; F.BaseRegs[i] = G; (void)InsertFormula(LU, LUIdx, F); @@ -3271,7 +3278,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, // Don't do this if there is more than one offset. if (LU.MinOffset != LU.MaxOffset) return; - assert(!Base.AM.BaseGV && "ICmpZero use is not legal!"); + assert(!Base.BaseGV && "ICmpZero use is not legal!"); // Check each interesting stride. for (SmallSetVector<int64_t, 8>::const_iterator @@ -3279,10 +3286,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, int64_t Factor = *I; // Check that the multiplication doesn't overflow. - if (Base.AM.BaseOffs == INT64_MIN && Factor == -1) + if (Base.BaseOffset == INT64_MIN && Factor == -1) continue; - int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; - if (NewBaseOffs / Factor != Base.AM.BaseOffs) + int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor; + if (NewBaseOffset / Factor != Base.BaseOffset) continue; // Check that multiplying with the use offset doesn't overflow. @@ -3294,14 +3301,14 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, continue; Formula F = Base; - F.AM.BaseOffs = NewBaseOffs; + F.BaseOffset = NewBaseOffset; // Check that this scale is legal. - if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) + if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F)) continue; // Compensate for the use having MinOffset built into it. - F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset; + F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset; const SCEV *FactorS = SE.getConstant(IntTy, Factor); @@ -3342,23 +3349,23 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. - if (Base.AM.Scale != 0) return; + if (Base.Scale != 0) return; // Check each interesting stride. for (SmallSetVector<int64_t, 8>::const_iterator I = Factors.begin(), E = Factors.end(); I != E; ++I) { int64_t Factor = *I; - Base.AM.Scale = Factor; - Base.AM.HasBaseReg = Base.BaseRegs.size() > 1; + Base.Scale = Factor; + Base.HasBaseReg = Base.BaseRegs.size() > 1; // Check whether this scale is going to be legal. - if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) { + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, + Base)) { // As a special-case, handle special out-of-loop Basic users specially. // TODO: Reconsider this special case. if (LU.Kind == LSRUse::Basic && - isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, - LSRUse::Special, LU.AccessTy, TLI) && + isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special, + LU.AccessTy, Base) && LU.AllFixupsOutsideLoop) LU.Kind = LSRUse::Special; else @@ -3367,7 +3374,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // For an ICmpZero, negating a solitary base register won't lead to // new solutions. if (LU.Kind == LSRUse::ICmpZero && - !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV) + !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV) continue; // For each addrec base reg, apply the scale, if possible. for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) @@ -3391,11 +3398,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { /// GenerateTruncates - Generate reuse formulae from different IV types. void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { - // This requires TargetLowering to tell us which truncates are free. - if (!TLI) return; - // Don't bother truncating symbolic values. - if (Base.AM.BaseGV) return; + if (Base.BaseGV) return; // Determine the integer type for the base formula. Type *DstTy = Base.getType(); @@ -3405,7 +3409,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { Type *SrcTy = *I; - if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { + if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) { Formula F = Base; if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I); @@ -3552,16 +3556,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { const Formula &F = LU.Formulae[L]; // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { - int64_t Offs = (uint64_t)F.AM.BaseOffs + - Imm * (uint64_t)F.AM.Scale; + int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; // Don't create 50 + reg(-50). if (F.referencesReg(SE.getSCEV( - ConstantInt::get(IntTy, -(uint64_t)Offs)))) + ConstantInt::get(IntTy, -(uint64_t)Offset)))) continue; Formula NewF = F; - NewF.AM.BaseOffs = Offs; - if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + NewF.BaseOffset = Offset; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, + NewF)) continue; NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); @@ -3570,9 +3573,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // immediate itself, then the formula isn't worthwhile. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) if (C->getValue()->isNegative() != - (NewF.AM.BaseOffs < 0) && - (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) - .ule(abs64(NewF.AM.BaseOffs))) + (NewF.BaseOffset < 0) && + (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale)) + .ule(abs64(NewF.BaseOffset))) continue; // OK, looks good. @@ -3584,11 +3587,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (BaseReg != OrigReg) continue; Formula NewF = F; - NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; - if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) { - if (!TLI || - !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) + NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, NewF)) { + if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) continue; NewF = F; NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; @@ -3602,11 +3604,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end(); J != JE; ++J) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) - if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt( - abs64(NewF.AM.BaseOffs)) && + if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt( + abs64(NewF.BaseOffset)) && (C->getValue()->getValue() + - NewF.AM.BaseOffs).countTrailingZeros() >= - CountTrailingZeros_64(NewF.AM.BaseOffs)) + NewF.BaseOffset).countTrailingZeros() >= + CountTrailingZeros_64(NewF.BaseOffset)) goto skip_formula; // Ok, looks good. @@ -3667,7 +3669,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { // Collect the best formula for each unique set of shared registers. This // is reset for each use. - typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo> + typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo> BestFormulaeTy; BestFormulaeTy BestFormulae; @@ -3702,7 +3704,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { dbgs() << "\n"); } else { - SmallVector<const SCEV *, 2> Key; + SmallVector<const SCEV *, 4> Key; for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), JE = F.BaseRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; @@ -3804,7 +3806,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) { Formula NewF = F; - NewF.AM.BaseOffs += C->getValue()->getSExtValue(); + NewF.BaseOffset += C->getValue()->getSExtValue(); NewF.BaseRegs.erase(NewF.BaseRegs.begin() + (I - F.BaseRegs.begin())); if (LU.HasFormulaWithSameRegs(NewF)) { @@ -3817,9 +3819,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { } } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) { if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) - if (!F.AM.BaseGV) { + if (!F.BaseGV) { Formula NewF = F; - NewF.AM.BaseGV = GV; + NewF.BaseGV = GV; NewF.BaseRegs.erase(NewF.BaseRegs.begin() + (I - F.BaseRegs.begin())); if (LU.HasFormulaWithSameRegs(NewF)) { @@ -3848,84 +3850,83 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { /// for expressions like A, A+1, A+2, etc., allocate a single register for /// them. void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { - if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { - DEBUG(dbgs() << "The search space is too complex.\n"); + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; - DEBUG(dbgs() << "Narrowing the search space by assuming that uses " - "separated by a constant offset will use the same " - "registers.\n"); + DEBUG(dbgs() << "The search space is too complex.\n" + "Narrowing the search space by assuming that uses separated " + "by a constant offset will use the same registers.\n"); - // This is especially useful for unrolled loops. + // This is especially useful for unrolled loops. - for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { - LSRUse &LU = Uses[LUIdx]; - for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), - E = LU.Formulae.end(); I != E; ++I) { - const Formula &F = *I; - if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) { - if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) { - if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs, - /*HasBaseReg=*/false, - LU.Kind, LU.AccessTy)) { - DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); - dbgs() << '\n'); - - LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; - - // Update the relocs to reference the new use. - for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), - E = Fixups.end(); I != E; ++I) { - LSRFixup &Fixup = *I; - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.AM.BaseOffs; - // Add the new offset to LUThatHas' offset list. - if (LUThatHas->Offsets.back() != Fixup.Offset) { - LUThatHas->Offsets.push_back(Fixup.Offset); - if (Fixup.Offset > LUThatHas->MaxOffset) - LUThatHas->MaxOffset = Fixup.Offset; - if (Fixup.Offset < LUThatHas->MinOffset) - LUThatHas->MinOffset = Fixup.Offset; - } - DEBUG(dbgs() << "New fixup has offset " - << Fixup.Offset << '\n'); - } - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; - } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.BaseOffset == 0 || F.Scale != 0) + continue; - // Delete formulae from the new use which are no longer legal. - bool Any = false; - for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { - Formula &F = LUThatHas->Formulae[i]; - if (!isLegalUse(F.AM, - LUThatHas->MinOffset, LUThatHas->MaxOffset, - LUThatHas->Kind, LUThatHas->AccessTy, TLI)) { - DEBUG(dbgs() << " Deleting "; F.print(dbgs()); - dbgs() << '\n'); - LUThatHas->DeleteFormula(F); - --i; - --e; - Any = true; - } - } - if (Any) - LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); + if (!LUThatHas) + continue; - // Delete the old use. - DeleteUse(LU, LUIdx); - --LUIdx; - --NumUses; - break; - } + if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, + LU.Kind, LU.AccessTy)) + continue; + + DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); + + LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + + // Update the relocs to reference the new use. + for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.BaseOffset; + // Add the new offset to LUThatHas' offset list. + if (LUThatHas->Offsets.back() != Fixup.Offset) { + LUThatHas->Offsets.push_back(Fixup.Offset); + if (Fixup.Offset > LUThatHas->MaxOffset) + LUThatHas->MaxOffset = Fixup.Offset; + if (Fixup.Offset < LUThatHas->MinOffset) + LUThatHas->MinOffset = Fixup.Offset; } + DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; } - } - DEBUG(dbgs() << "After pre-selection:\n"; - print_uses(dbgs())); + // Delete formulae from the new use which are no longer legal. + bool Any = false; + for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { + Formula &F = LUThatHas->Formulae[i]; + if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, + LUThatHas->Kind, LUThatHas->AccessTy, F)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); + LUThatHas->DeleteFormula(F); + --i; + --e; + Any = true; + } + } + + if (Any) + LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + + // Delete the old use. + DeleteUse(LU, LUIdx); + --LUIdx; + --NumUses; + break; + } } + + DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } /// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call @@ -4308,7 +4309,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Expand the ScaledReg portion. Value *ICmpScaledV = 0; - if (F.AM.Scale != 0) { + if (F.Scale != 0) { const SCEV *ScaledS = F.ScaledReg; // If we're expanding for a post-inc user, make the post-inc adjustment. @@ -4321,7 +4322,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. - assert(F.AM.Scale == -1 && + assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP); } else { @@ -4336,20 +4337,20 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); ScaledS = SE.getMulExpr(ScaledS, - SE.getConstant(ScaledS->getType(), F.AM.Scale)); + SE.getConstant(ScaledS->getType(), F.Scale)); Ops.push_back(ScaledS); } } // Expand the GV portion. - if (F.AM.BaseGV) { + if (F.BaseGV) { // Flush the operand list to suppress SCEVExpander hoisting. if (!Ops.empty()) { Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - Ops.push_back(SE.getUnknown(F.AM.BaseGV)); + Ops.push_back(SE.getUnknown(F.BaseGV)); } // Flush the operand list to suppress SCEVExpander hoisting of both folded and @@ -4361,7 +4362,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } // Expand the immediate portion. - int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset; + int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset; if (Offset != 0) { if (LU.Kind == LSRUse::ICmpZero) { // The other interesting way of "folding" with an ICmpZero is to use a @@ -4402,9 +4403,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (LU.Kind == LSRUse::ICmpZero) { ICmpInst *CI = cast<ICmpInst>(LF.UserInst); DeadInsts.push_back(CI->getOperand(1)); - assert(!F.AM.BaseGV && "ICmp does not support folding a global value and " + assert(!F.BaseGV && "ICmp does not support folding a global value and " "a scale at the same time!"); - if (F.AM.Scale == -1) { + if (F.Scale == -1) { if (ICmpScaledV->getType() != OpTy) { Instruction *Cast = CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, @@ -4414,7 +4415,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } CI->setOperand(1, ICmpScaledV); } else { - assert(F.AM.Scale == 0 && + assert(F.Scale == 0 && "ICmp does not support folding a global value and " "a scale at the same time!"); Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), @@ -4589,13 +4590,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Changed |= DeleteTriviallyDeadInstructions(DeadInsts); } -LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) - : IU(P->getAnalysis<IVUsers>()), - SE(P->getAnalysis<ScalarEvolution>()), - DT(P->getAnalysis<DominatorTree>()), - LI(P->getAnalysis<LoopInfo>()), - TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { - +LSRInstance::LSRInstance(Loop *L, Pass *P) + : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()), + DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()), + TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false), + IVIncInsertPos(0) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; @@ -4678,14 +4677,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) #ifndef NDEBUG // Formulae should be legal. - for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), - E = Uses.end(); I != E; ++I) { - const LSRUse &LU = *I; - for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), - JE = LU.Formulae.end(); J != JE; ++J) - assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI) && - "Illegal formula generated!"); + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end(); + I != E; ++I) { + const LSRUse &LU = *I; + for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), + JE = LU.Formulae.end(); + J != JE; ++J) + assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, + *J) && "Illegal formula generated!"); }; #endif @@ -4757,13 +4756,9 @@ void LSRInstance::dump() const { namespace { class LoopStrengthReduce : public LoopPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// transformation profitability. - const TargetLowering *const TLI; - public: static char ID; // Pass ID, replacement for typeid - explicit LoopStrengthReduce(const TargetLowering *tli = 0); + LoopStrengthReduce(); private: bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -4775,6 +4770,7 @@ private: char LoopStrengthReduce::ID = 0; INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(IVUsers) @@ -4784,14 +4780,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) -Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { - return new LoopStrengthReduce(TLI); +Pass *llvm::createLoopStrengthReducePass() { + return new LoopStrengthReduce(); } -LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(ID), TLI(tli) { - initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); - } +LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) { + initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); +} void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update @@ -4810,24 +4805,27 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); AU.addRequired<IVUsers>(); AU.addPreserved<IVUsers>(); + AU.addRequired<TargetTransformInfo>(); } bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { bool Changed = false; // Run the main LSR transformation. - Changed |= LSRInstance(TLI, L, this).getChanged(); + Changed |= LSRInstance(L, this).getChanged(); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); - if (EnablePhiElim) { + if (EnablePhiElim && L->isLoopSimplifyForm()) { SmallVector<WeakVH, 16> DeadInsts; SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif - unsigned numFolded = Rewriter. - replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI); + unsigned numFolded = + Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), + DeadInsts, + &getAnalysis<TargetTransformInfo>()); if (numFolded) { Changed = true; DeleteTriviallyDeadInstructions(DeadInsts); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 2b15528411..80d060b926 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -17,8 +17,9 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/DataLayout.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -90,6 +91,7 @@ namespace { AU.addPreservedID(LCSSAID); AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<TargetTransformInfo>(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next // loop will receive invalid dom info. @@ -101,6 +103,7 @@ namespace { char LoopUnroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -113,12 +116,14 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - const DataLayout *TD) { + bool &NotDuplicatable, + const TargetTransformInfo &TTI) { CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I, TD); + Metrics.analyzeBasicBlock(*I, TTI); NumCalls = Metrics.NumInlineCandidates; + NotDuplicatable = Metrics.notDuplicatable; unsigned LoopSize = Metrics.NumInsts; @@ -133,6 +138,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis<LoopInfo>(); ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); + const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() @@ -145,8 +151,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // not user specified. unsigned Threshold = CurrentThreshold; if (!UserThreshold && - Header->getParent()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize)) + Header->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) Threshold = OptSizeUnrollThreshold; // Find trip count and trip multiple if count is not available @@ -179,10 +186,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Enforce the threshold. if (Threshold != NoThreshold) { - const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); unsigned NumInlineCandidates; - unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD); + bool notDuplicatable; + unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, + notDuplicatable, TTI); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + if (notDuplicatable) { + DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable" + << " instructions.\n"); + return false; + } if (NumInlineCandidates != 0) { DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); return false; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index d41da4a9a9..0e8199f2fd 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -37,10 +37,11 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -101,7 +102,7 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. - bool countLoop(const Loop* L); + bool countLoop(const Loop* L, const TargetTransformInfo &TTI); // Clean all data related to given loop. void forgetLoop(const Loop* L); @@ -170,6 +171,7 @@ namespace { AU.addPreservedID(LCSSAID); AU.addPreserved<DominatorTree>(); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<TargetTransformInfo>(); } private: @@ -221,7 +223,7 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. -bool LUAnalysisCache::countLoop(const Loop* L) { +bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) { std::pair<LoopPropsMapIt, bool> InsertRes = LoopsProperties.insert(std::make_pair(L, LoopProperties())); @@ -243,11 +245,18 @@ bool LUAnalysisCache::countLoop(const Loop* L) { for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I); + Metrics.analyzeBasicBlock(*I, TTI); Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; + + if (Metrics.notDuplicatable) { + DEBUG(dbgs() << "NOT unswitching loop %" + << L->getHeader()->getName() << ", contents cannot be " + << "duplicated!\n"); + return false; + } } if (!Props.CanBeUnswitchedCount) { @@ -327,6 +336,7 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop, char LoopUnswitch::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -417,7 +427,7 @@ bool LoopUnswitch::processCurrentLoop() { // Probably we reach the quota of branches for this loop. If so // stop unswitching. - if (!BranchesInfo.countLoop(currentLoop)) + if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>())) return false; // Loop over all of the basic blocks in the loop. If we find an interior @@ -639,7 +649,8 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // Do not do non-trivial unswitch while optimizing for size. if (OptimizeForSize || - F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize)) + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) return false; UnswitchNontrivialCondition(LoopCond, Val, currentLoop); diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 7419a6543e..8ced4946c8 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "loweratomic" #include "llvm/Transforms/Scalar.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 26b6269f42..be0f0e8a25 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -20,11 +20,11 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp deleted file mode 100644 index ce397658bf..0000000000 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ /dev/null @@ -1,4232 +0,0 @@ -//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines ObjC ARC optimizations. ARC stands for -// Automatic Reference Counting and is a system for managing reference counts -// for objects in Objective C. -// -// The optimizations performed include elimination of redundant, partially -// redundant, and inconsequential reference count operations, elimination of -// redundant weak pointer operations, pattern-matching and replacement of -// low-level operations into higher-level operations, and numerous minor -// simplifications. -// -// This file also defines a simple ARC-aware AliasAnalysis. -// -// WARNING: This file knows about certain library functions. It recognizes them -// by name, and hardwires knowledge of their semantics. -// -// WARNING: This file knows about how certain Objective-C library functions are -// used. Naive LLVM IR transformations which would otherwise be -// behavior-preserving may break these assumptions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "objc-arc" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -// A handy option to enable/disable all optimizations in this file. -static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true)); - -//===----------------------------------------------------------------------===// -// Misc. Utilities -//===----------------------------------------------------------------------===// - -namespace { - /// MapVector - An associative container with fast insertion-order - /// (deterministic) iteration over its elements. Plus the special - /// blot operation. - template<class KeyT, class ValueT> - class MapVector { - /// Map - Map keys to indices in Vector. - typedef DenseMap<KeyT, size_t> MapTy; - MapTy Map; - - /// Vector - Keys and values. - typedef std::vector<std::pair<KeyT, ValueT> > VectorTy; - VectorTy Vector; - - public: - typedef typename VectorTy::iterator iterator; - typedef typename VectorTy::const_iterator const_iterator; - iterator begin() { return Vector.begin(); } - iterator end() { return Vector.end(); } - const_iterator begin() const { return Vector.begin(); } - const_iterator end() const { return Vector.end(); } - -#ifdef XDEBUG - ~MapVector() { - assert(Vector.size() >= Map.size()); // May differ due to blotting. - for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); - I != E; ++I) { - assert(I->second < Vector.size()); - assert(Vector[I->second].first == I->first); - } - for (typename VectorTy::const_iterator I = Vector.begin(), - E = Vector.end(); I != E; ++I) - assert(!I->first || - (Map.count(I->first) && - Map[I->first] == size_t(I - Vector.begin()))); - } -#endif - - ValueT &operator[](const KeyT &Arg) { - std::pair<typename MapTy::iterator, bool> Pair = - Map.insert(std::make_pair(Arg, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(std::make_pair(Arg, ValueT())); - return Vector[Num].second; - } - return Vector[Pair.first->second].second; - } - - std::pair<iterator, bool> - insert(const std::pair<KeyT, ValueT> &InsertPair) { - std::pair<typename MapTy::iterator, bool> Pair = - Map.insert(std::make_pair(InsertPair.first, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(InsertPair); - return std::make_pair(Vector.begin() + Num, true); - } - return std::make_pair(Vector.begin() + Pair.first->second, false); - } - - const_iterator find(const KeyT &Key) const { - typename MapTy::const_iterator It = Map.find(Key); - if (It == Map.end()) return Vector.end(); - return Vector.begin() + It->second; - } - - /// blot - This is similar to erase, but instead of removing the element - /// from the vector, it just zeros out the key in the vector. This leaves - /// iterators intact, but clients must be prepared for zeroed-out keys when - /// iterating. - void blot(const KeyT &Key) { - typename MapTy::iterator It = Map.find(Key); - if (It == Map.end()) return; - Vector[It->second].first = KeyT(); - Map.erase(It); - } - - void clear() { - Map.clear(); - Vector.clear(); - } - }; -} - -//===----------------------------------------------------------------------===// -// ARC Utilities. -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Transforms/Utils/Local.h" - -namespace { - /// InstructionClass - A simple classification for instructions. - enum InstructionClass { - IC_Retain, ///< objc_retain - IC_RetainRV, ///< objc_retainAutoreleasedReturnValue - IC_RetainBlock, ///< objc_retainBlock - IC_Release, ///< objc_release - IC_Autorelease, ///< objc_autorelease - IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue - IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush - IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop - IC_NoopCast, ///< objc_retainedObject, etc. - IC_FusedRetainAutorelease, ///< objc_retainAutorelease - IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue - IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) - IC_StoreWeak, ///< objc_storeWeak (primitive) - IC_InitWeak, ///< objc_initWeak (derived) - IC_LoadWeak, ///< objc_loadWeak (derived) - IC_MoveWeak, ///< objc_moveWeak (derived) - IC_CopyWeak, ///< objc_copyWeak (derived) - IC_DestroyWeak, ///< objc_destroyWeak (derived) - IC_StoreStrong, ///< objc_storeStrong (derived) - IC_CallOrUser, ///< could call objc_release and/or "use" pointers - IC_Call, ///< could call objc_release - IC_User, ///< could "use" a pointer - IC_None ///< anything else - }; -} - -/// IsPotentialUse - Test whether the given value is possible a -/// reference-counted pointer. -static bool IsPotentialUse(const Value *Op) { - // Pointers to static or stack storage are not reference-counted pointers. - if (isa<Constant>(Op) || isa<AllocaInst>(Op)) - return false; - // Special arguments are not reference-counted. - if (const Argument *Arg = dyn_cast<Argument>(Op)) - if (Arg->hasByValAttr() || - Arg->hasNestAttr() || - Arg->hasStructRetAttr()) - return false; - // Only consider values with pointer types. - // It seemes intuitive to exclude function pointer types as well, since - // functions are never reference-counted, however clang occasionally - // bitcasts reference-counted pointers to function-pointer type - // temporarily. - PointerType *Ty = dyn_cast<PointerType>(Op->getType()); - if (!Ty) - return false; - // Conservatively assume anything else is a potential use. - return true; -} - -/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind -/// of construct CS is. -static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (IsPotentialUse(*I)) - return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; - - return CS.onlyReadsMemory() ? IC_None : IC_Call; -} - -/// GetFunctionClass - Determine if F is one of the special known Functions. -/// If it isn't, return IC_CallOrUser. -static InstructionClass GetFunctionClass(const Function *F) { - Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - - // No arguments. - if (AI == AE) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) - .Default(IC_CallOrUser); - - // One argument. - const Argument *A0 = AI++; - if (AI == AE) - // Argument is a pointer. - if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { - Type *ETy = PTy->getElementType(); - // Argument is i8*. - if (ETy->isIntegerTy(8)) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_retain", IC_Retain) - .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) - .Case("objc_retainBlock", IC_RetainBlock) - .Case("objc_release", IC_Release) - .Case("objc_autorelease", IC_Autorelease) - .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) - .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) - .Case("objc_retainedObject", IC_NoopCast) - .Case("objc_unretainedObject", IC_NoopCast) - .Case("objc_unretainedPointer", IC_NoopCast) - .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) - .Default(IC_CallOrUser); - - // Argument is i8** - if (PointerType *Pte = dyn_cast<PointerType>(ETy)) - if (Pte->getElementType()->isIntegerTy(8)) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_loadWeakRetained", IC_LoadWeakRetained) - .Case("objc_loadWeak", IC_LoadWeak) - .Case("objc_destroyWeak", IC_DestroyWeak) - .Default(IC_CallOrUser); - } - - // Two arguments, first is i8**. - const Argument *A1 = AI++; - if (AI == AE) - if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) - if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) - if (Pte->getElementType()->isIntegerTy(8)) - if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { - Type *ETy1 = PTy1->getElementType(); - // Second argument is i8* - if (ETy1->isIntegerTy(8)) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_storeWeak", IC_StoreWeak) - .Case("objc_initWeak", IC_InitWeak) - .Case("objc_storeStrong", IC_StoreStrong) - .Default(IC_CallOrUser); - // Second argument is i8**. - if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) - if (Pte1->getElementType()->isIntegerTy(8)) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_moveWeak", IC_MoveWeak) - .Case("objc_copyWeak", IC_CopyWeak) - .Default(IC_CallOrUser); - } - - // Anything else. - return IC_CallOrUser; -} - -/// GetInstructionClass - Determine what kind of construct V is. -static InstructionClass GetInstructionClass(const Value *V) { - if (const Instruction *I = dyn_cast<Instruction>(V)) { - // Any instruction other than bitcast and gep with a pointer operand have a - // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer - // to a subsequent use, rather than using it themselves, in this sense. - // As a short cut, several other opcodes are known to have no pointer - // operands of interest. And ret is never followed by a release, so it's - // not interesting to examine. - switch (I->getOpcode()) { - case Instruction::Call: { - const CallInst *CI = cast<CallInst>(I); - // Check for calls to special functions. - if (const Function *F = CI->getCalledFunction()) { - InstructionClass Class = GetFunctionClass(F); - if (Class != IC_CallOrUser) - return Class; - - // None of the intrinsic functions do objc_release. For intrinsics, the - // only question is whether or not they may be users. - switch (F->getIntrinsicID()) { - case Intrinsic::returnaddress: case Intrinsic::frameaddress: - case Intrinsic::stacksave: case Intrinsic::stackrestore: - case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: - case Intrinsic::objectsize: case Intrinsic::prefetch: - case Intrinsic::stackprotector: - case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: - case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: - case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: - case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: - case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: case Intrinsic::invariant_end: - // Don't let dbg info affect our results. - case Intrinsic::dbg_declare: case Intrinsic::dbg_value: - // Short cut: Some intrinsics obviously don't use ObjC pointers. - return IC_None; - default: - break; - } - } - return GetCallSiteClass(CI); - } - case Instruction::Invoke: - return GetCallSiteClass(cast<InvokeInst>(I)); - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::Select: case Instruction::PHI: - case Instruction::Ret: case Instruction::Br: - case Instruction::Switch: case Instruction::IndirectBr: - case Instruction::Alloca: case Instruction::VAArg: - case Instruction::Add: case Instruction::FAdd: - case Instruction::Sub: case Instruction::FSub: - case Instruction::Mul: case Instruction::FMul: - case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: - case Instruction::SRem: case Instruction::URem: case Instruction::FRem: - case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - case Instruction::And: case Instruction::Or: case Instruction::Xor: - case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: - case Instruction::IntToPtr: case Instruction::FCmp: - case Instruction::FPTrunc: case Instruction::FPExt: - case Instruction::FPToUI: case Instruction::FPToSI: - case Instruction::UIToFP: case Instruction::SIToFP: - case Instruction::InsertElement: case Instruction::ExtractElement: - case Instruction::ShuffleVector: - case Instruction::ExtractValue: - break; - case Instruction::ICmp: - // Comparing a pointer with null, or any other constant, isn't an - // interesting use, because we don't care what the pointer points to, or - // about the values of any other dynamic reference-counted pointers. - if (IsPotentialUse(I->getOperand(1))) - return IC_User; - break; - default: - // For anything else, check all the operands. - // Note that this includes both operands of a Store: while the first - // operand isn't actually being dereferenced, it is being stored to - // memory where we can no longer track who might read it and dereference - // it, so we have to consider it potentially used. - for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); - OI != OE; ++OI) - if (IsPotentialUse(*OI)) - return IC_User; - } - } - - // Otherwise, it's totally inert for ARC purposes. - return IC_None; -} - -/// GetBasicInstructionClass - Determine what kind of construct V is. This is -/// similar to GetInstructionClass except that it only detects objc runtine -/// calls. This allows it to be faster. -static InstructionClass GetBasicInstructionClass(const Value *V) { - if (const CallInst *CI = dyn_cast<CallInst>(V)) { - if (const Function *F = CI->getCalledFunction()) - return GetFunctionClass(F); - // Otherwise, be conservative. - return IC_CallOrUser; - } - - // Otherwise, be conservative. - return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User; -} - -/// IsRetain - Test if the given class is objc_retain or -/// equivalent. -static bool IsRetain(InstructionClass Class) { - return Class == IC_Retain || - Class == IC_RetainRV; -} - -/// IsAutorelease - Test if the given class is objc_autorelease or -/// equivalent. -static bool IsAutorelease(InstructionClass Class) { - return Class == IC_Autorelease || - Class == IC_AutoreleaseRV; -} - -/// IsForwarding - Test if the given class represents instructions which return -/// their argument verbatim. -static bool IsForwarding(InstructionClass Class) { - // objc_retainBlock technically doesn't always return its argument - // verbatim, but it doesn't matter for our purposes here. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_RetainBlock || - Class == IC_NoopCast; -} - -/// IsNoopOnNull - Test if the given class represents instructions which do -/// nothing if passed a null pointer. -static bool IsNoopOnNull(InstructionClass Class) { - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Release || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_RetainBlock; -} - -/// IsAlwaysTail - Test if the given class represents instructions which are -/// always safe to mark with the "tail" keyword. -static bool IsAlwaysTail(InstructionClass Class) { - // IC_RetainBlock may be given a stack argument. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV; -} - -/// IsNoThrow - Test if the given class represents instructions which are always -/// safe to mark with the nounwind attribute.. -static bool IsNoThrow(InstructionClass Class) { - // objc_retainBlock is not nounwind because it calls user copy constructors - // which could theoretically throw. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Release || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_AutoreleasepoolPush || - Class == IC_AutoreleasepoolPop; -} - -/// EraseInstruction - Erase the given instruction. Many ObjC calls return their -/// argument verbatim, so if it's such a call and the return value has users, -/// replace them with the argument value. -static void EraseInstruction(Instruction *CI) { - Value *OldArg = cast<CallInst>(CI)->getArgOperand(0); - - bool Unused = CI->use_empty(); - - if (!Unused) { - // Replace the return value with the argument. - assert(IsForwarding(GetBasicInstructionClass(CI)) && - "Can't delete non-forwarding instruction with users!"); - CI->replaceAllUsesWith(OldArg); - } - - CI->eraseFromParent(); - - if (Unused) - RecursivelyDeleteTriviallyDeadInstructions(OldArg); -} - -/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which -/// also knows how to look through objc_retain and objc_autorelease calls, which -/// we know to return their argument verbatim. -static const Value *GetUnderlyingObjCPtr(const Value *V) { - for (;;) { - V = GetUnderlyingObject(V); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast<CallInst>(V)->getArgOperand(0); - } - - return V; -} - -/// StripPointerCastsAndObjCCalls - This is a wrapper around -/// Value::stripPointerCasts which also knows how to look through objc_retain -/// and objc_autorelease calls, which we know to return their argument verbatim. -static const Value *StripPointerCastsAndObjCCalls(const Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast<CallInst>(V)->getArgOperand(0); - } - return V; -} - -/// StripPointerCastsAndObjCCalls - This is a wrapper around -/// Value::stripPointerCasts which also knows how to look through objc_retain -/// and objc_autorelease calls, which we know to return their argument verbatim. -static Value *StripPointerCastsAndObjCCalls(Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast<CallInst>(V)->getArgOperand(0); - } - return V; -} - -/// GetObjCArg - Assuming the given instruction is one of the special calls such -/// as objc_retain or objc_release, return the argument value, stripped of no-op -/// casts and forwarding calls. -static Value *GetObjCArg(Value *Inst) { - return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); -} - -/// IsObjCIdentifiedObject - This is similar to AliasAnalysis' -/// isObjCIdentifiedObject, except that it uses special knowledge of -/// ObjC conventions... -static bool IsObjCIdentifiedObject(const Value *V) { - // Assume that call results and arguments have their own "provenance". - // Constants (including GlobalVariables) and Allocas are never - // reference-counted. - if (isa<CallInst>(V) || isa<InvokeInst>(V) || - isa<Argument>(V) || isa<Constant>(V) || - isa<AllocaInst>(V)) - return true; - - if (const LoadInst *LI = dyn_cast<LoadInst>(V)) { - const Value *Pointer = - StripPointerCastsAndObjCCalls(LI->getPointerOperand()); - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { - // A constant pointer can't be pointing to an object on the heap. It may - // be reference-counted, but it won't be deleted. - if (GV->isConstant()) - return true; - StringRef Name = GV->getName(); - // These special variables are known to hold values which are not - // reference-counted pointers. - if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || - Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || - Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || - Name.startswith("\01L_OBJC_METH_VAR_NAME_") || - Name.startswith("\01l_objc_msgSend_fixup_")) - return true; - } - } - - return false; -} - -/// FindSingleUseIdentifiedObject - This is similar to -/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value -/// with multiple uses. -static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { - if (Arg->hasOneUse()) { - if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) - return FindSingleUseIdentifiedObject(BC->getOperand(0)); - if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) - if (GEP->hasAllZeroIndices()) - return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); - if (IsForwarding(GetBasicInstructionClass(Arg))) - return FindSingleUseIdentifiedObject( - cast<CallInst>(Arg)->getArgOperand(0)); - if (!IsObjCIdentifiedObject(Arg)) - return 0; - return Arg; - } - - // If we found an identifiable object but it has multiple uses, but they are - // trivial uses, we can still consider this to be a single-use value. - if (IsObjCIdentifiedObject(Arg)) { - for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); - UI != UE; ++UI) { - const User *U = *UI; - if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) - return 0; - } - - return Arg; - } - - return 0; -} - -/// ModuleHasARC - Test if the given module looks interesting to run ARC -/// optimization on. -static bool ModuleHasARC(const Module &M) { - return - M.getNamedValue("objc_retain") || - M.getNamedValue("objc_release") || - M.getNamedValue("objc_autorelease") || - M.getNamedValue("objc_retainAutoreleasedReturnValue") || - M.getNamedValue("objc_retainBlock") || - M.getNamedValue("objc_autoreleaseReturnValue") || - M.getNamedValue("objc_autoreleasePoolPush") || - M.getNamedValue("objc_loadWeakRetained") || - M.getNamedValue("objc_loadWeak") || - M.getNamedValue("objc_destroyWeak") || - M.getNamedValue("objc_storeWeak") || - M.getNamedValue("objc_initWeak") || - M.getNamedValue("objc_moveWeak") || - M.getNamedValue("objc_copyWeak") || - M.getNamedValue("objc_retainedObject") || - M.getNamedValue("objc_unretainedObject") || - M.getNamedValue("objc_unretainedPointer"); -} - -/// DoesObjCBlockEscape - Test whether the given pointer, which is an -/// Objective C block pointer, does not "escape". This differs from regular -/// escape analysis in that a use as an argument to a call is not considered -/// an escape. -static bool DoesObjCBlockEscape(const Value *BlockPtr) { - // Walk the def-use chains. - SmallVector<const Value *, 4> Worklist; - Worklist.push_back(BlockPtr); - do { - const Value *V = Worklist.pop_back_val(); - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - const User *UUser = *UI; - // Special - Use by a call (callee or argument) is not considered - // to be an escape. - switch (GetBasicInstructionClass(UUser)) { - case IC_StoreWeak: - case IC_InitWeak: - case IC_StoreStrong: - case IC_Autorelease: - case IC_AutoreleaseRV: - // These special functions make copies of their pointer arguments. - return true; - case IC_User: - case IC_None: - // Use by an instruction which copies the value is an escape if the - // result is an escape. - if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) || - isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { - Worklist.push_back(UUser); - continue; - } - // Use by a load is not an escape. - if (isa<LoadInst>(UUser)) - continue; - // Use by a store is not an escape if the use is the address. - if (const StoreInst *SI = dyn_cast<StoreInst>(UUser)) - if (V != SI->getValueOperand()) - continue; - break; - default: - // Regular calls and other stuff are not considered escapes. - continue; - } - // Otherwise, conservatively assume an escape. - return true; - } - } while (!Worklist.empty()); - - // No escapes found. - return false; -} - -//===----------------------------------------------------------------------===// -// ARC AliasAnalysis. -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Pass.h" - -namespace { - /// ObjCARCAliasAnalysis - This is a simple alias analysis - /// implementation that uses knowledge of ARC constructs to answer queries. - /// - /// TODO: This class could be generalized to know about other ObjC-specific - /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing - /// even though their offsets are dynamic. - class ObjCARCAliasAnalysis : public ImmutablePass, - public AliasAnalysis { - public: - static char ID; // Class identification, replacement for typeinfo - ObjCARCAliasAnalysis() : ImmutablePass(ID) { - initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - private: - virtual void initializePass() { - InitializeAliasAnalysis(this); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const void *PI) { - if (PI == &AliasAnalysis::ID) - return static_cast<AliasAnalysis *>(this); - return this; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual AliasResult alias(const Location &LocA, const Location &LocB); - virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); - virtual ModRefBehavior getModRefBehavior(const Function *F); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2); - }; -} // End of anonymous namespace - -// Register this pass... -char ObjCARCAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", - "ObjC-ARC-Based Alias Analysis", false, true, false) - -ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { - return new ObjCARCAliasAnalysis(); -} - -void -ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); -} - -AliasAnalysis::AliasResult -ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { - if (!EnableARCOpts) - return AliasAnalysis::alias(LocA, LocB); - - // First, strip off no-ops, including ObjC-specific no-ops, and try making a - // precise alias query. - const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); - const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); - AliasResult Result = - AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), - Location(SB, LocB.Size, LocB.TBAATag)); - if (Result != MayAlias) - return Result; - - // If that failed, climb to the underlying object, including climbing through - // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *UA = GetUnderlyingObjCPtr(SA); - const Value *UB = GetUnderlyingObjCPtr(SB); - if (UA != SA || UB != SB) { - Result = AliasAnalysis::alias(Location(UA), Location(UB)); - // We can't use MustAlias or PartialAlias results here because - // GetUnderlyingObjCPtr may return an offsetted pointer value. - if (Result == NoAlias) - return NoAlias; - } - - // If that failed, fail. We don't need to chain here, since that's covered - // by the earlier precise query. - return MayAlias; -} - -bool -ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, - bool OrLocal) { - if (!EnableARCOpts) - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - - // First, strip off no-ops, including ObjC-specific no-ops, and try making - // a precise alias query. - const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); - if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), - OrLocal)) - return true; - - // If that failed, climb to the underlying object, including climbing through - // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *U = GetUnderlyingObjCPtr(S); - if (U != S) - return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); - - // If that failed, fail. We don't need to chain here, since that's covered - // by the earlier precise query. - return false; -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { - // We have nothing to do. Just chain to the next AliasAnalysis. - return AliasAnalysis::getModRefBehavior(CS); -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { - if (!EnableARCOpts) - return AliasAnalysis::getModRefBehavior(F); - - switch (GetFunctionClass(F)) { - case IC_NoopCast: - return DoesNotAccessMemory; - default: - break; - } - - return AliasAnalysis::getModRefBehavior(F); -} - -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { - if (!EnableARCOpts) - return AliasAnalysis::getModRefInfo(CS, Loc); - - switch (GetBasicInstructionClass(CS.getInstruction())) { - case IC_Retain: - case IC_RetainRV: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_NoopCast: - case IC_AutoreleasepoolPush: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - // These functions don't access any memory visible to the compiler. - // Note that this doesn't include objc_retainBlock, because it updates - // pointers when it copies block data. - return NoModRef; - default: - break; - } - - return AliasAnalysis::getModRefInfo(CS, Loc); -} - -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { - // TODO: Theoretically we could check for dependencies between objc_* calls - // and OnlyAccessesArgumentPointees calls or other well-behaved calls. - return AliasAnalysis::getModRefInfo(CS1, CS2); -} - -//===----------------------------------------------------------------------===// -// ARC expansion. -//===----------------------------------------------------------------------===// - -#include "llvm/Support/InstIterator.h" -#include "llvm/Transforms/Scalar.h" - -namespace { - /// ObjCARCExpand - Early ARC transformations. - class ObjCARCExpand : public FunctionPass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - /// Run - A flag indicating whether this optimization pass should run. - bool Run; - - public: - static char ID; - ObjCARCExpand() : FunctionPass(ID) { - initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCExpand::ID = 0; -INITIALIZE_PASS(ObjCARCExpand, - "objc-arc-expand", "ObjC ARC expansion", false, false) - -Pass *llvm::createObjCARCExpandPass() { - return new ObjCARCExpand(); -} - -void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} - -bool ObjCARCExpand::doInitialization(Module &M) { - Run = ModuleHasARC(M); - return false; -} - -bool ObjCARCExpand::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - bool Changed = false; - - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - Instruction *Inst = &*I; - - switch (GetBasicInstructionClass(Inst)) { - case IC_Retain: - case IC_RetainRV: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - // These calls return their argument verbatim, as a low-level - // optimization. However, this makes high-level optimizations - // harder. Undo any uses of this optimization that the front-end - // emitted here. We'll redo them in the contract pass. - Changed = true; - Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); - break; - default: - break; - } - } - - return Changed; -} - -//===----------------------------------------------------------------------===// -// ARC autorelease pool elimination. -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/STLExtras.h" -#include "llvm/Constants.h" - -namespace { - /// ObjCARCAPElim - Autorelease pool elimination. - class ObjCARCAPElim : public ModulePass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnModule(Module &M); - - static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); - static bool OptimizeBB(BasicBlock *BB); - - public: - static char ID; - ObjCARCAPElim() : ModulePass(ID) { - initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCAPElim::ID = 0; -INITIALIZE_PASS(ObjCARCAPElim, - "objc-arc-apelim", - "ObjC ARC autorelease pool elimination", - false, false) - -Pass *llvm::createObjCARCAPElimPass() { - return new ObjCARCAPElim(); -} - -void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} - -/// MayAutorelease - Interprocedurally determine if calls made by the -/// given call site can possibly produce autoreleases. -bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { - if (const Function *Callee = CS.getCalledFunction()) { - if (Callee->isDeclaration() || Callee->mayBeOverridden()) - return true; - for (Function::const_iterator I = Callee->begin(), E = Callee->end(); - I != E; ++I) { - const BasicBlock *BB = I; - for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); - J != F; ++J) - if (ImmutableCallSite JCS = ImmutableCallSite(J)) - // This recursion depth limit is arbitrary. It's just great - // enough to cover known interesting testcases. - if (Depth < 3 && - !JCS.onlyReadsMemory() && - MayAutorelease(JCS, Depth + 1)) - return true; - } - return false; - } - - return true; -} - -bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { - bool Changed = false; - - Instruction *Push = 0; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - switch (GetBasicInstructionClass(Inst)) { - case IC_AutoreleasepoolPush: - Push = Inst; - break; - case IC_AutoreleasepoolPop: - // If this pop matches a push and nothing in between can autorelease, - // zap the pair. - if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) { - Changed = true; - Inst->eraseFromParent(); - Push->eraseFromParent(); - } - Push = 0; - break; - case IC_CallOrUser: - if (MayAutorelease(ImmutableCallSite(Inst))) - Push = 0; - break; - default: - break; - } - } - - return Changed; -} - -bool ObjCARCAPElim::runOnModule(Module &M) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!ModuleHasARC(M)) - return false; - - // Find the llvm.global_ctors variable, as the first step in - // identifying the global constructors. In theory, unnecessary autorelease - // pools could occur anywhere, but in practice it's pretty rare. Global - // ctors are a place where autorelease pools get inserted automatically, - // so it's pretty common for them to be unnecessary, and it's pretty - // profitable to eliminate them. - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (!GV) - return false; - - assert(GV->hasDefinitiveInitializer() && - "llvm.global_ctors is uncooperative!"); - - bool Changed = false; - - // Dig the constructor functions out of GV's initializer. - ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); - for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); - OI != OE; ++OI) { - Value *Op = *OI; - // llvm.global_ctors is an array of pairs where the second members - // are constructor functions. - Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1)); - // If the user used a constructor function with the wrong signature and - // it got bitcasted or whatever, look the other way. - if (!F) - continue; - // Only look at function definitions. - if (F->isDeclaration()) - continue; - // Only look at functions with one basic block. - if (llvm::next(F->begin()) != F->end()) - continue; - // Ok, a single-block constructor function definition. Try to optimize it. - Changed |= OptimizeBB(F->begin()); - } - - return Changed; -} - -//===----------------------------------------------------------------------===// -// ARC optimization. -//===----------------------------------------------------------------------===// - -// TODO: On code like this: -// -// objc_retain(%x) -// stuff_that_cannot_release() -// objc_autorelease(%x) -// stuff_that_cannot_release() -// objc_retain(%x) -// stuff_that_cannot_release() -// objc_autorelease(%x) -// -// The second retain and autorelease can be deleted. - -// TODO: It should be possible to delete -// objc_autoreleasePoolPush and objc_autoreleasePoolPop -// pairs if nothing is actually autoreleased between them. Also, autorelease -// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code -// after inlining) can be turned into plain release calls. - -// TODO: Critical-edge splitting. If the optimial insertion point is -// a critical edge, the current algorithm has to fail, because it doesn't -// know how to split edges. It should be possible to make the optimizer -// think in terms of edges, rather than blocks, and then split critical -// edges on demand. - -// TODO: OptimizeSequences could generalized to be Interprocedural. - -// TODO: Recognize that a bunch of other objc runtime calls have -// non-escaping arguments and non-releasing arguments, and may be -// non-autoreleasing. - -// TODO: Sink autorelease calls as far as possible. Unfortunately we -// usually can't sink them past other calls, which would be the main -// case where it would be useful. - -// TODO: The pointer returned from objc_loadWeakRetained is retained. - -// TODO: Delete release+retain pairs (rare). - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/LLVMContext.h" -#include "llvm/Support/CFG.h" - -STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); -STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); -STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); -STATISTIC(NumRets, "Number of return value forwarding " - "retain+autoreleaes eliminated"); -STATISTIC(NumRRs, "Number of retain+release paths eliminated"); -STATISTIC(NumPeeps, "Number of calls peephole-optimized"); - -namespace { - /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it - /// uses many of the same techniques, except it uses special ObjC-specific - /// reasoning about pointer relationships. - class ProvenanceAnalysis { - AliasAnalysis *AA; - - typedef std::pair<const Value *, const Value *> ValuePairTy; - typedef DenseMap<ValuePairTy, bool> CachedResultsTy; - CachedResultsTy CachedResults; - - bool relatedCheck(const Value *A, const Value *B); - bool relatedSelect(const SelectInst *A, const Value *B); - bool relatedPHI(const PHINode *A, const Value *B); - - void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; - ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; - - public: - ProvenanceAnalysis() {} - - void setAA(AliasAnalysis *aa) { AA = aa; } - - AliasAnalysis *getAA() const { return AA; } - - bool related(const Value *A, const Value *B); - - void clear() { - CachedResults.clear(); - } - }; -} - -bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { - // If the values are Selects with the same condition, we can do a more precise - // check: just check for relations between the values on corresponding arms. - if (const SelectInst *SB = dyn_cast<SelectInst>(B)) - if (A->getCondition() == SB->getCondition()) - return related(A->getTrueValue(), SB->getTrueValue()) || - related(A->getFalseValue(), SB->getFalseValue()); - - // Check both arms of the Select node individually. - return related(A->getTrueValue(), B) || - related(A->getFalseValue(), B); -} - -bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { - // If the values are PHIs in the same block, we can do a more precise as well - // as efficient check: just check for relations between the values on - // corresponding edges. - if (const PHINode *PNB = dyn_cast<PHINode>(B)) - if (PNB->getParent() == A->getParent()) { - for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) - if (related(A->getIncomingValue(i), - PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) - return true; - return false; - } - - // Check each unique source of the PHI node against B. - SmallPtrSet<const Value *, 4> UniqueSrc; - for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { - const Value *PV1 = A->getIncomingValue(i); - if (UniqueSrc.insert(PV1) && related(PV1, B)) - return true; - } - - // All of the arms checked out. - return false; -} - -/// isStoredObjCPointer - Test if the value of P, or any value covered by its -/// provenance, is ever stored within the function (not counting callees). -static bool isStoredObjCPointer(const Value *P) { - SmallPtrSet<const Value *, 8> Visited; - SmallVector<const Value *, 8> Worklist; - Worklist.push_back(P); - Visited.insert(P); - do { - P = Worklist.pop_back_val(); - for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); - UI != UE; ++UI) { - const User *Ur = *UI; - if (isa<StoreInst>(Ur)) { - if (UI.getOperandNo() == 0) - // The pointer is stored. - return true; - // The pointed is stored through. - continue; - } - if (isa<CallInst>(Ur)) - // The pointer is passed as an argument, ignore this. - continue; - if (isa<PtrToIntInst>(P)) - // Assume the worst. - return true; - if (Visited.insert(Ur)) - Worklist.push_back(Ur); - } - } while (!Worklist.empty()); - - // Everything checked out. - return false; -} - -bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { - // Skip past provenance pass-throughs. - A = GetUnderlyingObjCPtr(A); - B = GetUnderlyingObjCPtr(B); - - // Quick check. - if (A == B) - return true; - - // Ask regular AliasAnalysis, for a first approximation. - switch (AA->alias(A, B)) { - case AliasAnalysis::NoAlias: - return false; - case AliasAnalysis::MustAlias: - case AliasAnalysis::PartialAlias: - return true; - case AliasAnalysis::MayAlias: - break; - } - - bool AIsIdentified = IsObjCIdentifiedObject(A); - bool BIsIdentified = IsObjCIdentifiedObject(B); - - // An ObjC-Identified object can't alias a load if it is never locally stored. - if (AIsIdentified) { - // Check for an obvious escape. - if (isa<LoadInst>(B)) - return isStoredObjCPointer(A); - if (BIsIdentified) { - // Check for an obvious escape. - if (isa<LoadInst>(A)) - return isStoredObjCPointer(B); - // Both pointers are identified and escapes aren't an evident problem. - return false; - } - } else if (BIsIdentified) { - // Check for an obvious escape. - if (isa<LoadInst>(A)) - return isStoredObjCPointer(B); - } - - // Special handling for PHI and Select. - if (const PHINode *PN = dyn_cast<PHINode>(A)) - return relatedPHI(PN, B); - if (const PHINode *PN = dyn_cast<PHINode>(B)) - return relatedPHI(PN, A); - if (const SelectInst *S = dyn_cast<SelectInst>(A)) - return relatedSelect(S, B); - if (const SelectInst *S = dyn_cast<SelectInst>(B)) - return relatedSelect(S, A); - - // Conservative. - return true; -} - -bool ProvenanceAnalysis::related(const Value *A, const Value *B) { - // Begin by inserting a conservative value into the map. If the insertion - // fails, we have the answer already. If it succeeds, leave it there until we - // compute the real answer to guard against recursive queries. - if (A > B) std::swap(A, B); - std::pair<CachedResultsTy::iterator, bool> Pair = - CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); - if (!Pair.second) - return Pair.first->second; - - bool Result = relatedCheck(A, B); - CachedResults[ValuePairTy(A, B)] = Result; - return Result; -} - -namespace { - // Sequence - A sequence of states that a pointer may go through in which an - // objc_retain and objc_release are actually needed. - enum Sequence { - S_None, - S_Retain, ///< objc_retain(x) - S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement - S_Use, ///< any use of x - S_Stop, ///< like S_Release, but code motion is stopped - S_Release, ///< objc_release(x) - S_MovableRelease ///< objc_release(x), !clang.imprecise_release - }; -} - -static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { - // The easy cases. - if (A == B) - return A; - if (A == S_None || B == S_None) - return S_None; - - if (A > B) std::swap(A, B); - if (TopDown) { - // Choose the side which is further along in the sequence. - if ((A == S_Retain || A == S_CanRelease) && - (B == S_CanRelease || B == S_Use)) - return B; - } else { - // Choose the side which is further along in the sequence. - if ((A == S_Use || A == S_CanRelease) && - (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) - return A; - // If both sides are releases, choose the more conservative one. - if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) - return A; - if (A == S_Release && B == S_MovableRelease) - return A; - } - - return S_None; -} - -namespace { - /// RRInfo - Unidirectional information about either a - /// retain-decrement-use-release sequence or release-use-decrement-retain - /// reverese sequence. - struct RRInfo { - /// KnownSafe - After an objc_retain, the reference count of the referenced - /// object is known to be positive. Similarly, before an objc_release, the - /// reference count of the referenced object is known to be positive. If - /// there are retain-release pairs in code regions where the retain count - /// is known to be positive, they can be eliminated, regardless of any side - /// effects between them. - /// - /// Also, a retain+release pair nested within another retain+release - /// pair all on the known same pointer value can be eliminated, regardless - /// of any intervening side effects. - /// - /// KnownSafe is true when either of these conditions is satisfied. - bool KnownSafe; - - /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as - /// opposed to objc_retain calls). - bool IsRetainBlock; - - /// IsTailCallRelease - True of the objc_release calls are all marked - /// with the "tail" keyword. - bool IsTailCallRelease; - - /// ReleaseMetadata - If the Calls are objc_release calls and they all have - /// a clang.imprecise_release tag, this is the metadata tag. - MDNode *ReleaseMetadata; - - /// Calls - For a top-down sequence, the set of objc_retains or - /// objc_retainBlocks. For bottom-up, the set of objc_releases. - SmallPtrSet<Instruction *, 2> Calls; - - /// ReverseInsertPts - The set of optimal insert positions for - /// moving calls in the opposite sequence. - SmallPtrSet<Instruction *, 2> ReverseInsertPts; - - RRInfo() : - KnownSafe(false), IsRetainBlock(false), - IsTailCallRelease(false), - ReleaseMetadata(0) {} - - void clear(); - }; -} - -void RRInfo::clear() { - KnownSafe = false; - IsRetainBlock = false; - IsTailCallRelease = false; - ReleaseMetadata = 0; - Calls.clear(); - ReverseInsertPts.clear(); -} - -namespace { - /// PtrState - This class summarizes several per-pointer runtime properties - /// which are propogated through the flow graph. - class PtrState { - /// KnownPositiveRefCount - True if the reference count is known to - /// be incremented. - bool KnownPositiveRefCount; - - /// Partial - True of we've seen an opportunity for partial RR elimination, - /// such as pushing calls into a CFG triangle or into one side of a - /// CFG diamond. - bool Partial; - - /// Seq - The current position in the sequence. - Sequence Seq : 8; - - public: - /// RRI - Unidirectional information about the current sequence. - /// TODO: Encapsulate this better. - RRInfo RRI; - - PtrState() : KnownPositiveRefCount(false), Partial(false), - Seq(S_None) {} - - void SetKnownPositiveRefCount() { - KnownPositiveRefCount = true; - } - - void ClearRefCount() { - KnownPositiveRefCount = false; - } - - bool IsKnownIncremented() const { - return KnownPositiveRefCount; - } - - void SetSeq(Sequence NewSeq) { - Seq = NewSeq; - } - - Sequence GetSeq() const { - return Seq; - } - - void ClearSequenceProgress() { - ResetSequenceProgress(S_None); - } - - void ResetSequenceProgress(Sequence NewSeq) { - Seq = NewSeq; - Partial = false; - RRI.clear(); - } - - void Merge(const PtrState &Other, bool TopDown); - }; -} - -void -PtrState::Merge(const PtrState &Other, bool TopDown) { - Seq = MergeSeqs(Seq, Other.Seq, TopDown); - KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - - // We can't merge a plain objc_retain with an objc_retainBlock. - if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) - Seq = S_None; - - // If we're not in a sequence (anymore), drop all associated state. - if (Seq == S_None) { - Partial = false; - RRI.clear(); - } else if (Partial || Other.Partial) { - // If we're doing a merge on a path that's previously seen a partial - // merge, conservatively drop the sequence, to avoid doing partial - // RR elimination. If the branch predicates for the two merge differ, - // mixing them is unsafe. - ClearSequenceProgress(); - } else { - // Conservatively merge the ReleaseMetadata information. - if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) - RRI.ReleaseMetadata = 0; - - RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; - RRI.IsTailCallRelease = RRI.IsTailCallRelease && - Other.RRI.IsTailCallRelease; - RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); - - // Merge the insert point sets. If there are any differences, - // that makes this a partial merge. - Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size(); - for (SmallPtrSet<Instruction *, 2>::const_iterator - I = Other.RRI.ReverseInsertPts.begin(), - E = Other.RRI.ReverseInsertPts.end(); I != E; ++I) - Partial |= RRI.ReverseInsertPts.insert(*I); - } -} - -namespace { - /// BBState - Per-BasicBlock state. - class BBState { - /// TopDownPathCount - The number of unique control paths from the entry - /// which can reach this block. - unsigned TopDownPathCount; - - /// BottomUpPathCount - The number of unique control paths to exits - /// from this block. - unsigned BottomUpPathCount; - - /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp. - typedef MapVector<const Value *, PtrState> MapTy; - - /// PerPtrTopDown - The top-down traversal uses this to record information - /// known about a pointer at the bottom of each block. - MapTy PerPtrTopDown; - - /// PerPtrBottomUp - The bottom-up traversal uses this to record information - /// known about a pointer at the top of each block. - MapTy PerPtrBottomUp; - - /// Preds, Succs - Effective successors and predecessors of the current - /// block (this ignores ignorable edges and ignored backedges). - SmallVector<BasicBlock *, 2> Preds; - SmallVector<BasicBlock *, 2> Succs; - - public: - BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} - - typedef MapTy::iterator ptr_iterator; - typedef MapTy::const_iterator ptr_const_iterator; - - ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } - ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } - ptr_const_iterator top_down_ptr_begin() const { - return PerPtrTopDown.begin(); - } - ptr_const_iterator top_down_ptr_end() const { - return PerPtrTopDown.end(); - } - - ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } - ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } - ptr_const_iterator bottom_up_ptr_begin() const { - return PerPtrBottomUp.begin(); - } - ptr_const_iterator bottom_up_ptr_end() const { - return PerPtrBottomUp.end(); - } - - /// SetAsEntry - Mark this block as being an entry block, which has one - /// path from the entry by definition. - void SetAsEntry() { TopDownPathCount = 1; } - - /// SetAsExit - Mark this block as being an exit block, which has one - /// path to an exit by definition. - void SetAsExit() { BottomUpPathCount = 1; } - - PtrState &getPtrTopDownState(const Value *Arg) { - return PerPtrTopDown[Arg]; - } - - PtrState &getPtrBottomUpState(const Value *Arg) { - return PerPtrBottomUp[Arg]; - } - - void clearBottomUpPointers() { - PerPtrBottomUp.clear(); - } - - void clearTopDownPointers() { - PerPtrTopDown.clear(); - } - - void InitFromPred(const BBState &Other); - void InitFromSucc(const BBState &Other); - void MergePred(const BBState &Other); - void MergeSucc(const BBState &Other); - - /// GetAllPathCount - Return the number of possible unique paths from an - /// entry to an exit which pass through this block. This is only valid - /// after both the top-down and bottom-up traversals are complete. - unsigned GetAllPathCount() const { - assert(TopDownPathCount != 0); - assert(BottomUpPathCount != 0); - return TopDownPathCount * BottomUpPathCount; - } - - // Specialized CFG utilities. - typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator; - edge_iterator pred_begin() { return Preds.begin(); } - edge_iterator pred_end() { return Preds.end(); } - edge_iterator succ_begin() { return Succs.begin(); } - edge_iterator succ_end() { return Succs.end(); } - - void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } - void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } - - bool isExit() const { return Succs.empty(); } - }; -} - -void BBState::InitFromPred(const BBState &Other) { - PerPtrTopDown = Other.PerPtrTopDown; - TopDownPathCount = Other.TopDownPathCount; -} - -void BBState::InitFromSucc(const BBState &Other) { - PerPtrBottomUp = Other.PerPtrBottomUp; - BottomUpPathCount = Other.BottomUpPathCount; -} - -/// MergePred - The top-down traversal uses this to merge information about -/// predecessors to form the initial state for a new block. -void BBState::MergePred(const BBState &Other) { - // Other.TopDownPathCount can be 0, in which case it is either dead or a - // loop backedge. Loop backedges are special. - TopDownPathCount += Other.TopDownPathCount; - - // Check for overflow. If we have overflow, fall back to conservative behavior. - if (TopDownPathCount < Other.TopDownPathCount) { - clearTopDownPointers(); - return; - } - - // For each entry in the other set, if our set has an entry with the same key, - // merge the entries. Otherwise, copy the entry and merge it with an empty - // entry. - for (ptr_const_iterator MI = Other.top_down_ptr_begin(), - ME = Other.top_down_ptr_end(); MI != ME; ++MI) { - std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, - /*TopDown=*/true); - } - - // For each entry in our set, if the other set doesn't have an entry with the - // same key, force it to merge with an empty entry. - for (ptr_iterator MI = top_down_ptr_begin(), - ME = top_down_ptr_end(); MI != ME; ++MI) - if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) - MI->second.Merge(PtrState(), /*TopDown=*/true); -} - -/// MergeSucc - The bottom-up traversal uses this to merge information about -/// successors to form the initial state for a new block. -void BBState::MergeSucc(const BBState &Other) { - // Other.BottomUpPathCount can be 0, in which case it is either dead or a - // loop backedge. Loop backedges are special. - BottomUpPathCount += Other.BottomUpPathCount; - - // Check for overflow. If we have overflow, fall back to conservative behavior. - if (BottomUpPathCount < Other.BottomUpPathCount) { - clearBottomUpPointers(); - return; - } - - // For each entry in the other set, if our set has an entry with the - // same key, merge the entries. Otherwise, copy the entry and merge - // it with an empty entry. - for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), - ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { - std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, - /*TopDown=*/false); - } - - // For each entry in our set, if the other set doesn't have an entry - // with the same key, force it to merge with an empty entry. - for (ptr_iterator MI = bottom_up_ptr_begin(), - ME = bottom_up_ptr_end(); MI != ME; ++MI) - if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) - MI->second.Merge(PtrState(), /*TopDown=*/false); -} - -namespace { - /// ObjCARCOpt - The main ARC optimization pass. - class ObjCARCOpt : public FunctionPass { - bool Changed; - ProvenanceAnalysis PA; - - /// Run - A flag indicating whether this optimization pass should run. - bool Run; - - /// RetainRVCallee, etc. - Declarations for ObjC runtime - /// functions, for use in creating calls to them. These are initialized - /// lazily to avoid cluttering up the Module with unused declarations. - Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee, - *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee; - - /// UsedInThisFunciton - Flags which determine whether each of the - /// interesting runtine functions is in fact used in the current function. - unsigned UsedInThisFunction; - - /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release - /// metadata. - unsigned ImpreciseReleaseMDKind; - - /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape - /// metadata. - unsigned CopyOnEscapeMDKind; - - /// NoObjCARCExceptionsMDKind - The Metadata Kind for - /// clang.arc.no_objc_arc_exceptions metadata. - unsigned NoObjCARCExceptionsMDKind; - - Constant *getRetainRVCallee(Module *M); - Constant *getAutoreleaseRVCallee(Module *M); - Constant *getReleaseCallee(Module *M); - Constant *getRetainCallee(Module *M); - Constant *getRetainBlockCallee(Module *M); - Constant *getAutoreleaseCallee(Module *M); - - bool IsRetainBlockOptimizable(const Instruction *Inst); - - void OptimizeRetainCall(Function &F, Instruction *Retain); - bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); - void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV); - void OptimizeIndividualCalls(Function &F); - - void CheckForCFGHazards(const BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - BBState &MyStates) const; - bool VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector<Value *, RRInfo> &Retains, - BBState &MyStates); - bool VisitBottomUp(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains); - bool VisitInstructionTopDown(Instruction *Inst, - DenseMap<Value *, RRInfo> &Releases, - BBState &MyStates); - bool VisitTopDown(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - DenseMap<Value *, RRInfo> &Releases); - bool Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases); - - void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts, - Module *M); - - bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M); - - void OptimizeWeakCalls(Function &F); - - bool OptimizeSequences(Function &F); - - void OptimizeReturns(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - virtual void releaseMemory(); - - public: - static char ID; - ObjCARCOpt() : FunctionPass(ID) { - initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCOpt::ID = 0; -INITIALIZE_PASS_BEGIN(ObjCARCOpt, - "objc-arc", "ObjC ARC optimization", false, false) -INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) -INITIALIZE_PASS_END(ObjCARCOpt, - "objc-arc", "ObjC ARC optimization", false, false) - -Pass *llvm::createObjCARCOptPass() { - return new ObjCARCOpt(); -} - -void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<ObjCARCAliasAnalysis>(); - AU.addRequired<AliasAnalysis>(); - // ARC optimization doesn't currently split critical edges. - AU.setPreservesCFG(); -} - -bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { - // Without the magic metadata tag, we have to assume this might be an - // objc_retainBlock call inserted to convert a block pointer to an id, - // in which case it really is needed. - if (!Inst->getMetadata(CopyOnEscapeMDKind)) - return false; - - // If the pointer "escapes" (not including being used in a call), - // the copy may be needed. - if (DoesObjCBlockEscape(Inst)) - return false; - - // Otherwise, it's not needed. - return true; -} - -Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { - if (!RetainRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attributes = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attributes::get(C, Attributes::NoUnwind)); - RetainRVCallee = - M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, - Attributes); - } - return RetainRVCallee; -} - -Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { - if (!AutoreleaseRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attributes = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attributes::get(C, Attributes::NoUnwind)); - AutoreleaseRVCallee = - M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, - Attributes); - } - return AutoreleaseRVCallee; -} - -Constant *ObjCARCOpt::getReleaseCallee(Module *M) { - if (!ReleaseCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attributes = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attributes::get(C, Attributes::NoUnwind)); - ReleaseCallee = - M->getOrInsertFunction( - "objc_release", - FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), - Attributes); - } - return ReleaseCallee; -} - -Constant *ObjCARCOpt::getRetainCallee(Module *M) { - if (!RetainCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attributes = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attributes::get(C, Attributes::NoUnwind)); - RetainCallee = - M->getOrInsertFunction( - "objc_retain", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - Attributes); - } - return RetainCallee; -} - -Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { - if (!RetainBlockCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - // objc_retainBlock is not nounwind because it calls user copy constructors - // which could theoretically throw. - RetainBlockCallee = - M->getOrInsertFunction( - "objc_retainBlock", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - AttributeSet()); - } - return RetainBlockCallee; -} - -Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { - if (!AutoreleaseCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attributes = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attributes::get(C, Attributes::NoUnwind)); - AutoreleaseCallee = - M->getOrInsertFunction( - "objc_autorelease", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - Attributes); - } - return AutoreleaseCallee; -} - -/// IsPotentialUse - Test whether the given value is possible a -/// reference-counted pointer, including tests which utilize AliasAnalysis. -static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { - // First make the rudimentary check. - if (!IsPotentialUse(Op)) - return false; - - // Objects in constant memory are not reference-counted. - if (AA.pointsToConstantMemory(Op)) - return false; - - // Pointers in constant memory are not pointing to reference-counted objects. - if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) - if (AA.pointsToConstantMemory(LI->getPointerOperand())) - return false; - - // Otherwise assume the worst. - return true; -} - -/// CanAlterRefCount - Test whether the given instruction can result in a -/// reference count modification (positive or negative) for the pointer's -/// object. -static bool -CanAlterRefCount(const Instruction *Inst, const Value *Ptr, - ProvenanceAnalysis &PA, InstructionClass Class) { - switch (Class) { - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_User: - // These operations never directly modify a reference count. - return false; - default: break; - } - - ImmutableCallSite CS = static_cast<const Value *>(Inst); - assert(CS && "Only calls can alter reference counts!"); - - // See if AliasAnalysis can help us with the call. - AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); - if (AliasAnalysis::onlyReadsMemory(MRB)) - return false; - if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) { - const Value *Op = *I; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; - } - - // Assume the worst. - return true; -} - -/// CanUse - Test whether the given instruction can "use" the given pointer's -/// object in a way that requires the reference count to be positive. -static bool -CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, - InstructionClass Class) { - // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. - if (Class == IC_Call) - return false; - - // Consider various instructions which may have pointer arguments which are - // not "uses". - if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) { - // Comparing a pointer with null, or any other constant, isn't really a use, - // because we don't care what the pointer points to, or about the values - // of any other dynamic reference-counted pointers. - if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA())) - return false; - } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { - // For calls, just check the arguments (and not the callee operand). - for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), - OE = CS.arg_end(); OI != OE; ++OI) { - const Value *Op = *OI; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; - } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - // Special-case stores, because we don't care about the stored value, just - // the store address. - const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); - // If we can't tell what the underlying object was, assume there is a - // dependence. - return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr); - } - - // Check each operand for a match. - for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); - OI != OE; ++OI) { - const Value *Op = *OI; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; -} - -/// CanInterruptRV - Test whether the given instruction can autorelease -/// any pointer or cause an autoreleasepool pop. -static bool -CanInterruptRV(InstructionClass Class) { - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_CallOrUser: - case IC_Call: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - return true; - default: - return false; - } -} - -namespace { - /// DependenceKind - There are several kinds of dependence-like concepts in - /// use here. - enum DependenceKind { - NeedsPositiveRetainCount, - AutoreleasePoolBoundary, - CanChangeRetainCount, - RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. - RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. - RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. - }; -} - -/// Depends - Test if there can be dependencies on Inst through Arg. This -/// function only tests dependencies relevant for removing pairs of calls. -static bool -Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, - ProvenanceAnalysis &PA) { - // If we've reached the definition of Arg, stop. - if (Inst == Arg) - return true; - - switch (Flavor) { - case NeedsPositiveRetainCount: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - case IC_None: - return false; - default: - return CanUse(Inst, Arg, PA, Class); - } - } - - case AutoreleasePoolBoundary: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - // These mark the end and begin of an autorelease pool scope. - return true; - default: - // Nothing else does this. - return false; - } - } - - case CanChangeRetainCount: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - // Conservatively assume this can decrement any count. - return true; - case IC_AutoreleasepoolPush: - case IC_None: - return false; - default: - return CanAlterRefCount(Inst, Arg, PA, Class); - } - } - - case RetainAutoreleaseDep: - switch (GetBasicInstructionClass(Inst)) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - // Don't merge an objc_autorelease with an objc_retain inside a different - // autoreleasepool scope. - return true; - case IC_Retain: - case IC_RetainRV: - // Check for a retain of the same pointer for merging. - return GetObjCArg(Inst) == Arg; - default: - // Nothing else matters for objc_retainAutorelease formation. - return false; - } - - case RetainAutoreleaseRVDep: { - InstructionClass Class = GetBasicInstructionClass(Inst); - switch (Class) { - case IC_Retain: - case IC_RetainRV: - // Check for a retain of the same pointer for merging. - return GetObjCArg(Inst) == Arg; - default: - // Anything that can autorelease interrupts - // retainAutoreleaseReturnValue formation. - return CanInterruptRV(Class); - } - } - - case RetainRVDep: - return CanInterruptRV(GetBasicInstructionClass(Inst)); - } - - llvm_unreachable("Invalid dependence flavor"); -} - -/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and -/// find local and non-local dependencies on Arg. -/// TODO: Cache results? -static void -FindDependencies(DependenceKind Flavor, - const Value *Arg, - BasicBlock *StartBB, Instruction *StartInst, - SmallPtrSet<Instruction *, 4> &DependingInstructions, - SmallPtrSet<const BasicBlock *, 4> &Visited, - ProvenanceAnalysis &PA) { - BasicBlock::iterator StartPos = StartInst; - - SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist; - Worklist.push_back(std::make_pair(StartBB, StartPos)); - do { - std::pair<BasicBlock *, BasicBlock::iterator> Pair = - Worklist.pop_back_val(); - BasicBlock *LocalStartBB = Pair.first; - BasicBlock::iterator LocalStartPos = Pair.second; - BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); - for (;;) { - if (LocalStartPos == StartBBBegin) { - pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); - if (PI == PE) - // If we've reached the function entry, produce a null dependence. - DependingInstructions.insert(0); - else - // Add the predecessors to the worklist. - do { - BasicBlock *PredBB = *PI; - if (Visited.insert(PredBB)) - Worklist.push_back(std::make_pair(PredBB, PredBB->end())); - } while (++PI != PE); - break; - } - - Instruction *Inst = --LocalStartPos; - if (Depends(Flavor, Inst, Arg, PA)) { - DependingInstructions.insert(Inst); - break; - } - } - } while (!Worklist.empty()); - - // Determine whether the original StartBB post-dominates all of the blocks we - // visited. If not, insert a sentinal indicating that most optimizations are - // not safe. - for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(), - E = Visited.end(); I != E; ++I) { - const BasicBlock *BB = *I; - if (BB == StartBB) - continue; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { - const BasicBlock *Succ = *SI; - if (Succ != StartBB && !Visited.count(Succ)) { - DependingInstructions.insert(reinterpret_cast<Instruction *>(-1)); - return; - } - } - } -} - -static bool isNullOrUndef(const Value *V) { - return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); -} - -static bool isNoopInstruction(const Instruction *I) { - return isa<BitCastInst>(I) || - (isa<GetElementPtrInst>(I) && - cast<GetElementPtrInst>(I)->hasAllZeroIndices()); -} - -/// OptimizeRetainCall - Turn objc_retain into -/// objc_retainAutoreleasedReturnValue if the operand is a return value. -void -ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { - ImmutableCallSite CS(GetObjCArg(Retain)); - const Instruction *Call = CS.getInstruction(); - if (!Call) return; - if (Call->getParent() != Retain->getParent()) return; - - // Check that the call is next to the retain. - BasicBlock::const_iterator I = Call; - ++I; - while (isNoopInstruction(I)) ++I; - if (&*I != Retain) - return; - - // Turn it to an objc_retainAutoreleasedReturnValue.. - Changed = true; - ++NumPeeps; - cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); -} - -/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into -/// objc_retain if the operand is not a return value. Or, if it can be paired -/// with an objc_autoreleaseReturnValue, delete the pair and return true. -bool -ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { - // Check for the argument being from an immediately preceding call or invoke. - const Value *Arg = GetObjCArg(RetainRV); - ImmutableCallSite CS(Arg); - if (const Instruction *Call = CS.getInstruction()) { - if (Call->getParent() == RetainRV->getParent()) { - BasicBlock::const_iterator I = Call; - ++I; - while (isNoopInstruction(I)) ++I; - if (&*I == RetainRV) - return false; - } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { - BasicBlock *RetainRVParent = RetainRV->getParent(); - if (II->getNormalDest() == RetainRVParent) { - BasicBlock::const_iterator I = RetainRVParent->begin(); - while (isNoopInstruction(I)) ++I; - if (&*I == RetainRV) - return false; - } - } - } - - // Check for being preceded by an objc_autoreleaseReturnValue on the same - // pointer. In this case, we can delete the pair. - BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); - if (I != Begin) { - do --I; while (I != Begin && isNoopInstruction(I)); - if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && - GetObjCArg(I) == Arg) { - Changed = true; - ++NumPeeps; - EraseInstruction(I); - EraseInstruction(RetainRV); - return true; - } - } - - // Turn it to a plain objc_retain. - Changed = true; - ++NumPeeps; - cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); - return false; -} - -/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into -/// objc_autorelease if the result is not used as a return value. -void -ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { - // Check for a return of the pointer value. - const Value *Ptr = GetObjCArg(AutoreleaseRV); - SmallVector<const Value *, 2> Users; - Users.push_back(Ptr); - do { - Ptr = Users.pop_back_val(); - for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); - UI != UE; ++UI) { - const User *I = *UI; - if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) - return; - if (isa<BitCastInst>(I)) - Users.push_back(I); - } - } while (!Users.empty()); - - Changed = true; - ++NumPeeps; - cast<CallInst>(AutoreleaseRV)-> - setCalledFunction(getAutoreleaseCallee(F.getParent())); -} - -/// OptimizeIndividualCalls - Visit each call, one at a time, and make -/// simplifications without doing any additional analysis. -void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { - // Reset all the flags in preparation for recomputing them. - UsedInThisFunction = 0; - - // Visit all objc_* calls in F. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - InstructionClass Class = GetBasicInstructionClass(Inst); - - switch (Class) { - default: break; - - // Delete no-op casts. These function calls have special semantics, but - // the semantics are entirely implemented via lowering in the front-end, - // so by the time they reach the optimizer, they are just no-op calls - // which return their argument. - // - // There are gray areas here, as the ability to cast reference-counted - // pointers to raw void* and back allows code to break ARC assumptions, - // however these are currently considered to be unimportant. - case IC_NoopCast: - Changed = true; - ++NumNoops; - EraseInstruction(Inst); - continue; - - // If the pointer-to-weak-pointer is null, it's undefined behavior. - case IC_StoreWeak: - case IC_LoadWeak: - case IC_LoadWeakRetained: - case IC_InitWeak: - case IC_DestroyWeak: { - CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0))) { - Changed = true; - Type *Ty = CI->getArgOperand(0)->getType(); - new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), - Constant::getNullValue(Ty), - CI); - CI->replaceAllUsesWith(UndefValue::get(CI->getType())); - CI->eraseFromParent(); - continue; - } - break; - } - case IC_CopyWeak: - case IC_MoveWeak: { - CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0)) || - isNullOrUndef(CI->getArgOperand(1))) { - Changed = true; - Type *Ty = CI->getArgOperand(0)->getType(); - new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), - Constant::getNullValue(Ty), - CI); - CI->replaceAllUsesWith(UndefValue::get(CI->getType())); - CI->eraseFromParent(); - continue; - } - break; - } - case IC_Retain: - OptimizeRetainCall(F, Inst); - break; - case IC_RetainRV: - if (OptimizeRetainRVCall(F, Inst)) - continue; - break; - case IC_AutoreleaseRV: - OptimizeAutoreleaseRVCall(F, Inst); - break; - } - - // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. - if (IsAutorelease(Class) && Inst->use_empty()) { - CallInst *Call = cast<CallInst>(Inst); - const Value *Arg = Call->getArgOperand(0); - Arg = FindSingleUseIdentifiedObject(Arg); - if (Arg) { - Changed = true; - ++NumAutoreleases; - - // Create the declaration lazily. - LLVMContext &C = Inst->getContext(); - CallInst *NewCall = - CallInst::Create(getReleaseCallee(F.getParent()), - Call->getArgOperand(0), "", Call); - NewCall->setMetadata(ImpreciseReleaseMDKind, - MDNode::get(C, ArrayRef<Value *>())); - EraseInstruction(Call); - Inst = NewCall; - Class = IC_Release; - } - } - - // For functions which can never be passed stack arguments, add - // a tail keyword. - if (IsAlwaysTail(Class)) { - Changed = true; - cast<CallInst>(Inst)->setTailCall(); - } - - // Set nounwind as needed. - if (IsNoThrow(Class)) { - Changed = true; - cast<CallInst>(Inst)->setDoesNotThrow(); - } - - if (!IsNoopOnNull(Class)) { - UsedInThisFunction |= 1 << Class; - continue; - } - - const Value *Arg = GetObjCArg(Inst); - - // ARC calls with null are no-ops. Delete them. - if (isNullOrUndef(Arg)) { - Changed = true; - ++NumNoops; - EraseInstruction(Inst); - continue; - } - - // Keep track of which of retain, release, autorelease, and retain_block - // are actually present in this function. - UsedInThisFunction |= 1 << Class; - - // If Arg is a PHI, and one or more incoming values to the - // PHI are null, and the call is control-equivalent to the PHI, and there - // are no relevant side effects between the PHI and the call, the call - // could be pushed up to just those paths with non-null incoming values. - // For now, don't bother splitting critical edges for this. - SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; - Worklist.push_back(std::make_pair(Inst, Arg)); - do { - std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); - Inst = Pair.first; - Arg = Pair.second; - - const PHINode *PN = dyn_cast<PHINode>(Arg); - if (!PN) continue; - - // Determine if the PHI has any null operands, or any incoming - // critical edges. - bool HasNull = false; - bool HasCriticalEdges = false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = - StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (isNullOrUndef(Incoming)) - HasNull = true; - else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) - .getNumSuccessors() != 1) { - HasCriticalEdges = true; - break; - } - } - // If we have null operands and no critical edges, optimize. - if (!HasCriticalEdges && HasNull) { - SmallPtrSet<Instruction *, 4> DependingInstructions; - SmallPtrSet<const BasicBlock *, 4> Visited; - - // Check that there is nothing that cares about the reference - // count between the call and the phi. - switch (Class) { - case IC_Retain: - case IC_RetainBlock: - // These can always be moved up. - break; - case IC_Release: - // These can't be moved across things that care about the retain - // count. - FindDependencies(NeedsPositiveRetainCount, Arg, - Inst->getParent(), Inst, - DependingInstructions, Visited, PA); - break; - case IC_Autorelease: - // These can't be moved across autorelease pool scope boundaries. - FindDependencies(AutoreleasePoolBoundary, Arg, - Inst->getParent(), Inst, - DependingInstructions, Visited, PA); - break; - case IC_RetainRV: - case IC_AutoreleaseRV: - // Don't move these; the RV optimization depends on the autoreleaseRV - // being tail called, and the retainRV being immediately after a call - // (which might still happen if we get lucky with codegen layout, but - // it's not worth taking the chance). - continue; - default: - llvm_unreachable("Invalid dependence flavor"); - } - - if (DependingInstructions.size() == 1 && - *DependingInstructions.begin() == PN) { - Changed = true; - ++NumPartialNoops; - // Clone the call into each predecessor that has a non-null value. - CallInst *CInst = cast<CallInst>(Inst); - Type *ParamTy = CInst->getArgOperand(0)->getType(); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = - StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (!isNullOrUndef(Incoming)) { - CallInst *Clone = cast<CallInst>(CInst->clone()); - Value *Op = PN->getIncomingValue(i); - Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); - if (Op->getType() != ParamTy) - Op = new BitCastInst(Op, ParamTy, "", InsertPos); - Clone->setArgOperand(0, Op); - Clone->insertBefore(InsertPos); - Worklist.push_back(std::make_pair(Clone, Incoming)); - } - } - // Erase the original call. - EraseInstruction(CInst); - continue; - } - } - } while (!Worklist.empty()); - } -} - -/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible -/// control flow, or other CFG structures where moving code across the edge -/// would result in it being executed more. -void -ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - BBState &MyStates) const { - // If any top-down local-use or possible-dec has a succ which is earlier in - // the sequence, forget it. - for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), - E = MyStates.top_down_ptr_end(); I != E; ++I) - switch (I->second.GetSeq()) { - default: break; - case S_Use: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) - --SE; - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap<const BasicBlock *, BBState>::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: - case S_CanRelease: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } - continue; - } - case S_Use: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) - S.ClearSequenceProgress(); - break; - } - case S_CanRelease: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) - --SE; - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap<const BasicBlock *, BBState>::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } - continue; - } - case S_CanRelease: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) - S.ClearSequenceProgress(); - break; - } - } -} - -bool -ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector<Value *, RRInfo> &Retains, - BBState &MyStates) { - bool NestingDetected = false; - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - - switch (Class) { - case IC_Release: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrBottomUpState(Arg); - - // If we see two releases in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second release, which may allow us to - // eliminate the first release too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) - NestingDetected = true; - - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); - S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownIncremented(); - S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); - S.RRI.Calls.insert(Inst); - - S.SetKnownPositiveRefCount(); - break; - } - case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrBottomUpState(Arg); - S.SetKnownPositiveRefCount(); - - switch (S.GetSeq()) { - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH - case S_CanRelease: - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - Retains[Inst] = S.RRI; - } - S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - return NestingDetected; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearBottomUpPointers(); - return NestingDetected; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - return NestingDetected; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), - ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { - case S_Release: - case S_MovableRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - assert(S.RRI.ReverseInsertPts.empty()); - // If this is an invoke instruction, we're scanning it as part of - // one of its successor blocks, since we can't insert code after it - // in its own block, and we don't want to split critical edges. - if (isa<InvokeInst>(Inst)) - S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); - else - S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); - S.SetSeq(S_Use); - } else if (Seq == S_Release && - (Class == IC_User || Class == IC_CallOrUser)) { - // Non-movable releases depend on any possible objc pointer use. - S.SetSeq(S_Stop); - assert(S.RRI.ReverseInsertPts.empty()); - // As above; handle invoke specially. - if (isa<InvokeInst>(Inst)) - S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); - else - S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); - } - break; - case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_CanRelease: - case S_Use: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitBottomUp(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains) { - bool NestingDetected = false; - BBState &MyStates = BBStates[BB]; - - // Merge the states from each successor to compute the initial state - // for the current block. - BBState::edge_iterator SI(MyStates.succ_begin()), - SE(MyStates.succ_end()); - if (SI != SE) { - const BasicBlock *Succ = *SI; - DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); - assert(I != BBStates.end()); - MyStates.InitFromSucc(I->second); - ++SI; - for (; SI != SE; ++SI) { - Succ = *SI; - I = BBStates.find(Succ); - assert(I != BBStates.end()); - MyStates.MergeSucc(I->second); - } - } - - // Visit all the instructions, bottom-up. - for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { - Instruction *Inst = llvm::prior(I); - - // Invoke instructions are visited as part of their successors (below). - if (isa<InvokeInst>(Inst)) - continue; - - NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); - } - - // If there's a predecessor with an invoke, visit the invoke as if it were - // part of this block, since we can't insert code after an invoke in its own - // block, and we don't want to split critical edges. - for (BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); PI != PE; ++PI) { - BasicBlock *Pred = *PI; - if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back())) - NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, - DenseMap<Value *, RRInfo> &Releases, - BBState &MyStates) { - bool NestingDetected = false; - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - - switch (Class) { - case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - // If we see two retains in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second retain, which may allow us to - // eliminate the first retain too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Retain) - NestingDetected = true; - - S.ResetSequenceProgress(S_Retain); - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - S.RRI.KnownSafe = S.IsKnownIncremented(); - S.RRI.Calls.insert(Inst); - } - - S.SetKnownPositiveRefCount(); - - // A retain can be a potential use; procede to the generic checking - // code below. - break; - } - case IC_Release: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - S.ClearRefCount(); - - switch (S.GetSeq()) { - case S_Retain: - case S_CanRelease: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH - case S_Use: - S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); - Releases[Inst] = S.RRI; - S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - break; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearTopDownPointers(); - return NestingDetected; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - return NestingDetected; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), - ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); - switch (Seq) { - case S_Retain: - S.SetSeq(S_CanRelease); - assert(S.RRI.ReverseInsertPts.empty()); - S.RRI.ReverseInsertPts.insert(Inst); - - // One call can't cause a transition from S_Retain to S_CanRelease - // and S_CanRelease to S_Use. If we've made the first transition, - // we're done. - continue; - case S_Use: - case S_CanRelease: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { - case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_Retain: - case S_Use: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitTopDown(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - DenseMap<Value *, RRInfo> &Releases) { - bool NestingDetected = false; - BBState &MyStates = BBStates[BB]; - - // Merge the states from each predecessor to compute the initial state - // for the current block. - BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); - if (PI != PE) { - const BasicBlock *Pred = *PI; - DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); - assert(I != BBStates.end()); - MyStates.InitFromPred(I->second); - ++PI; - for (; PI != PE; ++PI) { - Pred = *PI; - I = BBStates.find(Pred); - assert(I != BBStates.end()); - MyStates.MergePred(I->second); - } - } - - // Visit all the instructions, top-down. - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - Instruction *Inst = I; - NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); - } - - CheckForCFGHazards(BB, BBStates, MyStates); - return NestingDetected; -} - -static void -ComputePostOrders(Function &F, - SmallVectorImpl<BasicBlock *> &PostOrder, - SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder, - unsigned NoObjCARCExceptionsMDKind, - DenseMap<const BasicBlock *, BBState> &BBStates) { - /// Visited - The visited set, for doing DFS walks. - SmallPtrSet<BasicBlock *, 16> Visited; - - // Do DFS, computing the PostOrder. - SmallPtrSet<BasicBlock *, 16> OnStack; - SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; - - // Functions always have exactly one entry block, and we don't have - // any other block that we treat like an entry block. - BasicBlock *EntryBB = &F.getEntryBlock(); - BBState &MyStates = BBStates[EntryBB]; - MyStates.SetAsEntry(); - TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back()); - SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); - Visited.insert(EntryBB); - OnStack.insert(EntryBB); - do { - dfs_next_succ: - BasicBlock *CurrBB = SuccStack.back().first; - TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back()); - succ_iterator SE(TI, false); - - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) - --SE; - - while (SuccStack.back().second != SE) { - BasicBlock *SuccBB = *SuccStack.back().second++; - if (Visited.insert(SuccBB)) { - TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back()); - SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI))); - BBStates[CurrBB].addSucc(SuccBB); - BBState &SuccStates = BBStates[SuccBB]; - SuccStates.addPred(CurrBB); - OnStack.insert(SuccBB); - goto dfs_next_succ; - } - - if (!OnStack.count(SuccBB)) { - BBStates[CurrBB].addSucc(SuccBB); - BBStates[SuccBB].addPred(CurrBB); - } - } - OnStack.erase(CurrBB); - PostOrder.push_back(CurrBB); - SuccStack.pop_back(); - } while (!SuccStack.empty()); - - Visited.clear(); - - // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. - // Functions may have many exits, and there also blocks which we treat - // as exits due to ignored edges. - SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *ExitBB = I; - BBState &MyStates = BBStates[ExitBB]; - if (!MyStates.isExit()) - continue; - - MyStates.SetAsExit(); - - PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin())); - Visited.insert(ExitBB); - while (!PredStack.empty()) { - reverse_dfs_next_succ: - BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); - while (PredStack.back().second != PE) { - BasicBlock *BB = *PredStack.back().second++; - if (Visited.insert(BB)) { - PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); - goto reverse_dfs_next_succ; - } - } - ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); - } - } -} - -// Visit - Visit the function both top-down and bottom-up. -bool -ObjCARCOpt::Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { - - // Use reverse-postorder traversals, because we magically know that loops - // will be well behaved, i.e. they won't repeatedly call retain on a single - // pointer without doing a release. We can't use the ReversePostOrderTraversal - // class here because we want the reverse-CFG postorder to consider each - // function exit point, and we want to ignore selected cycle edges. - SmallVector<BasicBlock *, 16> PostOrder; - SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; - ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, - NoObjCARCExceptionsMDKind, - BBStates); - - // Use reverse-postorder on the reverse CFG for bottom-up. - bool BottomUpNestingDetected = false; - for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = - ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); - I != E; ++I) - BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); - - // Use reverse-postorder for top-down. - bool TopDownNestingDetected = false; - for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = - PostOrder.rbegin(), E = PostOrder.rend(); - I != E; ++I) - TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); - - return TopDownNestingDetected && BottomUpNestingDetected; -} - -/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove. -void ObjCARCOpt::MoveCalls(Value *Arg, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts, - Module *M) { - Type *ArgTy = Arg->getType(); - Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); - - // Insert the new retain and release calls. - for (SmallPtrSet<Instruction *, 2>::const_iterator - PI = ReleasesToMove.ReverseInsertPts.begin(), - PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) { - Instruction *InsertPt = *PI; - Value *MyArg = ArgTy == ParamTy ? Arg : - new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = - CallInst::Create(RetainsToMove.IsRetainBlock ? - getRetainBlockCallee(M) : getRetainCallee(M), - MyArg, "", InsertPt); - Call->setDoesNotThrow(); - if (RetainsToMove.IsRetainBlock) - Call->setMetadata(CopyOnEscapeMDKind, - MDNode::get(M->getContext(), ArrayRef<Value *>())); - else - Call->setTailCall(); - } - for (SmallPtrSet<Instruction *, 2>::const_iterator - PI = RetainsToMove.ReverseInsertPts.begin(), - PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { - Instruction *InsertPt = *PI; - Value *MyArg = ArgTy == ParamTy ? Arg : - new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, - "", InsertPt); - // Attach a clang.imprecise_release metadata tag, if appropriate. - if (MDNode *M = ReleasesToMove.ReleaseMetadata) - Call->setMetadata(ImpreciseReleaseMDKind, M); - Call->setDoesNotThrow(); - if (ReleasesToMove.IsTailCallRelease) - Call->setTailCall(); - } - - // Delete the original retain and release calls. - for (SmallPtrSet<Instruction *, 2>::const_iterator - AI = RetainsToMove.Calls.begin(), - AE = RetainsToMove.Calls.end(); AI != AE; ++AI) { - Instruction *OrigRetain = *AI; - Retains.blot(OrigRetain); - DeadInsts.push_back(OrigRetain); - } - for (SmallPtrSet<Instruction *, 2>::const_iterator - AI = ReleasesToMove.Calls.begin(), - AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) { - Instruction *OrigRelease = *AI; - Releases.erase(OrigRelease); - DeadInsts.push_back(OrigRelease); - } -} - -/// PerformCodePlacement - Identify pairings between the retains and releases, -/// and delete and/or move them. -bool -ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> - &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M) { - bool AnyPairsCompletelyEliminated = false; - RRInfo RetainsToMove; - RRInfo ReleasesToMove; - SmallVector<Instruction *, 4> NewRetains; - SmallVector<Instruction *, 4> NewReleases; - SmallVector<Instruction *, 8> DeadInsts; - - // Visit each retain. - for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), - E = Retains.end(); I != E; ++I) { - Value *V = I->first; - if (!V) continue; // blotted - - Instruction *Retain = cast<Instruction>(V); - Value *Arg = GetObjCArg(Retain); - - // If the object being released is in static or stack storage, we know it's - // not being managed by ObjC reference counting, so we can delete pairs - // regardless of what possible decrements or uses lie between them. - bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); - - // A constant pointer can't be pointing to an object on the heap. It may - // be reference-counted, but it won't be deleted. - if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) - if (const GlobalVariable *GV = - dyn_cast<GlobalVariable>( - StripPointerCastsAndObjCCalls(LI->getPointerOperand()))) - if (GV->isConstant()) - KnownSafe = true; - - // If a pair happens in a region where it is known that the reference count - // is already incremented, we can similarly ignore possible decrements. - bool KnownSafeTD = true, KnownSafeBU = true; - - // Connect the dots between the top-down-collected RetainsToMove and - // bottom-up-collected ReleasesToMove to form sets of related calls. - // This is an iterative process so that we connect multiple releases - // to multiple retains if needed. - unsigned OldDelta = 0; - unsigned NewDelta = 0; - unsigned OldCount = 0; - unsigned NewCount = 0; - bool FirstRelease = true; - bool FirstRetain = true; - NewRetains.push_back(Retain); - for (;;) { - for (SmallVectorImpl<Instruction *>::const_iterator - NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { - Instruction *NewRetain = *NI; - MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); - assert(It != Retains.end()); - const RRInfo &NewRetainRRI = It->second; - KnownSafeTD &= NewRetainRRI.KnownSafe; - for (SmallPtrSet<Instruction *, 2>::const_iterator - LI = NewRetainRRI.Calls.begin(), - LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { - Instruction *NewRetainRelease = *LI; - DenseMap<Value *, RRInfo>::const_iterator Jt = - Releases.find(NewRetainRelease); - if (Jt == Releases.end()) - goto next_retain; - const RRInfo &NewRetainReleaseRRI = Jt->second; - assert(NewRetainReleaseRRI.Calls.count(NewRetain)); - if (ReleasesToMove.Calls.insert(NewRetainRelease)) { - OldDelta -= - BBStates[NewRetainRelease->getParent()].GetAllPathCount(); - - // Merge the ReleaseMetadata and IsTailCallRelease values. - if (FirstRelease) { - ReleasesToMove.ReleaseMetadata = - NewRetainReleaseRRI.ReleaseMetadata; - ReleasesToMove.IsTailCallRelease = - NewRetainReleaseRRI.IsTailCallRelease; - FirstRelease = false; - } else { - if (ReleasesToMove.ReleaseMetadata != - NewRetainReleaseRRI.ReleaseMetadata) - ReleasesToMove.ReleaseMetadata = 0; - if (ReleasesToMove.IsTailCallRelease != - NewRetainReleaseRRI.IsTailCallRelease) - ReleasesToMove.IsTailCallRelease = false; - } - - // Collect the optimal insertion points. - if (!KnownSafe) - for (SmallPtrSet<Instruction *, 2>::const_iterator - RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), - RE = NewRetainReleaseRRI.ReverseInsertPts.end(); - RI != RE; ++RI) { - Instruction *RIP = *RI; - if (ReleasesToMove.ReverseInsertPts.insert(RIP)) - NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); - } - NewReleases.push_back(NewRetainRelease); - } - } - } - NewRetains.clear(); - if (NewReleases.empty()) break; - - // Back the other way. - for (SmallVectorImpl<Instruction *>::const_iterator - NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { - Instruction *NewRelease = *NI; - DenseMap<Value *, RRInfo>::const_iterator It = - Releases.find(NewRelease); - assert(It != Releases.end()); - const RRInfo &NewReleaseRRI = It->second; - KnownSafeBU &= NewReleaseRRI.KnownSafe; - for (SmallPtrSet<Instruction *, 2>::const_iterator - LI = NewReleaseRRI.Calls.begin(), - LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { - Instruction *NewReleaseRetain = *LI; - MapVector<Value *, RRInfo>::const_iterator Jt = - Retains.find(NewReleaseRetain); - if (Jt == Retains.end()) - goto next_retain; - const RRInfo &NewReleaseRetainRRI = Jt->second; - assert(NewReleaseRetainRRI.Calls.count(NewRelease)); - if (RetainsToMove.Calls.insert(NewReleaseRetain)) { - unsigned PathCount = - BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); - OldDelta += PathCount; - OldCount += PathCount; - - // Merge the IsRetainBlock values. - if (FirstRetain) { - RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; - FirstRetain = false; - } else if (ReleasesToMove.IsRetainBlock != - NewReleaseRetainRRI.IsRetainBlock) - // It's not possible to merge the sequences if one uses - // objc_retain and the other uses objc_retainBlock. - goto next_retain; - - // Collect the optimal insertion points. - if (!KnownSafe) - for (SmallPtrSet<Instruction *, 2>::const_iterator - RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), - RE = NewReleaseRetainRRI.ReverseInsertPts.end(); - RI != RE; ++RI) { - Instruction *RIP = *RI; - if (RetainsToMove.ReverseInsertPts.insert(RIP)) { - PathCount = BBStates[RIP->getParent()].GetAllPathCount(); - NewDelta += PathCount; - NewCount += PathCount; - } - } - NewRetains.push_back(NewReleaseRetain); - } - } - } - NewReleases.clear(); - if (NewRetains.empty()) break; - } - - // If the pointer is known incremented or nested, we can safely delete the - // pair regardless of what's between them. - if (KnownSafeTD || KnownSafeBU) { - RetainsToMove.ReverseInsertPts.clear(); - ReleasesToMove.ReverseInsertPts.clear(); - NewCount = 0; - } else { - // Determine whether the new insertion points we computed preserve the - // balance of retain and release calls through the program. - // TODO: If the fully aggressive solution isn't valid, try to find a - // less aggressive solution which is. - if (NewDelta != 0) - goto next_retain; - } - - // Determine whether the original call points are balanced in the retain and - // release calls through the program. If not, conservatively don't touch - // them. - // TODO: It's theoretically possible to do code motion in this case, as - // long as the existing imbalances are maintained. - if (OldDelta != 0) - goto next_retain; - - // Ok, everything checks out and we're all set. Let's move some code! - Changed = true; - assert(OldCount != 0 && "Unreachable code?"); - AnyPairsCompletelyEliminated = NewCount == 0; - NumRRs += OldCount - NewCount; - MoveCalls(Arg, RetainsToMove, ReleasesToMove, - Retains, Releases, DeadInsts, M); - - next_retain: - NewReleases.clear(); - NewRetains.clear(); - RetainsToMove.clear(); - ReleasesToMove.clear(); - } - - // Now that we're done moving everything, we can delete the newly dead - // instructions, as we no longer need them as insert points. - while (!DeadInsts.empty()) - EraseInstruction(DeadInsts.pop_back_val()); - - return AnyPairsCompletelyEliminated; -} - -/// OptimizeWeakCalls - Weak pointer optimizations. -void ObjCARCOpt::OptimizeWeakCalls(Function &F) { - // First, do memdep-style RLE and S2L optimizations. We can't use memdep - // itself because it uses AliasAnalysis and we need to do provenance - // queries instead. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - InstructionClass Class = GetBasicInstructionClass(Inst); - if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) - continue; - - // Delete objc_loadWeak calls with no users. - if (Class == IC_LoadWeak && Inst->use_empty()) { - Inst->eraseFromParent(); - continue; - } - - // TODO: For now, just look for an earlier available version of this value - // within the same block. Theoretically, we could do memdep-style non-local - // analysis too, but that would want caching. A better approach would be to - // use the technique that EarlyCSE uses. - inst_iterator Current = llvm::prior(I); - BasicBlock *CurrentBB = Current.getBasicBlockIterator(); - for (BasicBlock::iterator B = CurrentBB->begin(), - J = Current.getInstructionIterator(); - J != B; --J) { - Instruction *EarlierInst = &*llvm::prior(J); - InstructionClass EarlierClass = GetInstructionClass(EarlierInst); - switch (EarlierClass) { - case IC_LoadWeak: - case IC_LoadWeakRetained: { - // If this is loading from the same pointer, replace this load's value - // with that one. - CallInst *Call = cast<CallInst>(Inst); - CallInst *EarlierCall = cast<CallInst>(EarlierInst); - Value *Arg = Call->getArgOperand(0); - Value *EarlierArg = EarlierCall->getArgOperand(0); - switch (PA.getAA()->alias(Arg, EarlierArg)) { - case AliasAnalysis::MustAlias: - Changed = true; - // If the load has a builtin retain, insert a plain retain for it. - if (Class == IC_LoadWeakRetained) { - CallInst *CI = - CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, - "", Call); - CI->setTailCall(); - } - // Zap the fully redundant load. - Call->replaceAllUsesWith(EarlierCall); - Call->eraseFromParent(); - goto clobbered; - case AliasAnalysis::MayAlias: - case AliasAnalysis::PartialAlias: - goto clobbered; - case AliasAnalysis::NoAlias: - break; - } - break; - } - case IC_StoreWeak: - case IC_InitWeak: { - // If this is storing to the same pointer and has the same size etc. - // replace this load's value with the stored value. - CallInst *Call = cast<CallInst>(Inst); - CallInst *EarlierCall = cast<CallInst>(EarlierInst); - Value *Arg = Call->getArgOperand(0); - Value *EarlierArg = EarlierCall->getArgOperand(0); - switch (PA.getAA()->alias(Arg, EarlierArg)) { - case AliasAnalysis::MustAlias: - Changed = true; - // If the load has a builtin retain, insert a plain retain for it. - if (Class == IC_LoadWeakRetained) { - CallInst *CI = - CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, - "", Call); - CI->setTailCall(); - } - // Zap the fully redundant load. - Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); - Call->eraseFromParent(); - goto clobbered; - case AliasAnalysis::MayAlias: - case AliasAnalysis::PartialAlias: - goto clobbered; - case AliasAnalysis::NoAlias: - break; - } - break; - } - case IC_MoveWeak: - case IC_CopyWeak: - // TOOD: Grab the copied value. - goto clobbered; - case IC_AutoreleasepoolPush: - case IC_None: - case IC_User: - // Weak pointers are only modified through the weak entry points - // (and arbitrary calls, which could call the weak entry points). - break; - default: - // Anything else could modify the weak pointer. - goto clobbered; - } - } - clobbered:; - } - - // Then, for each destroyWeak with an alloca operand, check to see if - // the alloca and all its users can be zapped. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - InstructionClass Class = GetBasicInstructionClass(Inst); - if (Class != IC_DestroyWeak) - continue; - - CallInst *Call = cast<CallInst>(Inst); - Value *Arg = Call->getArgOperand(0); - if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { - for (Value::use_iterator UI = Alloca->use_begin(), - UE = Alloca->use_end(); UI != UE; ++UI) { - const Instruction *UserInst = cast<Instruction>(*UI); - switch (GetBasicInstructionClass(UserInst)) { - case IC_InitWeak: - case IC_StoreWeak: - case IC_DestroyWeak: - continue; - default: - goto done; - } - } - Changed = true; - for (Value::use_iterator UI = Alloca->use_begin(), - UE = Alloca->use_end(); UI != UE; ) { - CallInst *UserInst = cast<CallInst>(*UI++); - switch (GetBasicInstructionClass(UserInst)) { - case IC_InitWeak: - case IC_StoreWeak: - // These functions return their second argument. - UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); - break; - case IC_DestroyWeak: - // No return value. - break; - default: - llvm_unreachable("alloca really is used!"); - } - UserInst->eraseFromParent(); - } - Alloca->eraseFromParent(); - done:; - } - } -} - -/// OptimizeSequences - Identify program paths which execute sequences of -/// retains and releases which can be eliminated. -bool ObjCARCOpt::OptimizeSequences(Function &F) { - /// Releases, Retains - These are used to store the results of the main flow - /// analysis. These use Value* as the key instead of Instruction* so that the - /// map stays valid when we get around to rewriting code and calls get - /// replaced by arguments. - DenseMap<Value *, RRInfo> Releases; - MapVector<Value *, RRInfo> Retains; - - /// BBStates, This is used during the traversal of the function to track the - /// states for each identified object at each block. - DenseMap<const BasicBlock *, BBState> BBStates; - - // Analyze the CFG of the function, and all instructions. - bool NestingDetected = Visit(F, BBStates, Retains, Releases); - - // Transform. - return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) && - NestingDetected; -} - -/// OptimizeReturns - Look for this pattern: -/// \code -/// %call = call i8* @something(...) -/// %2 = call i8* @objc_retain(i8* %call) -/// %3 = call i8* @objc_autorelease(i8* %2) -/// ret i8* %3 -/// \endcode -/// And delete the retain and autorelease. -/// -/// Otherwise if it's just this: -/// \code -/// %3 = call i8* @objc_autorelease(i8* %2) -/// ret i8* %3 -/// \endcode -/// convert the autorelease to autoreleaseRV. -void ObjCARCOpt::OptimizeReturns(Function &F) { - if (!F.getReturnType()->isPointerTy()) - return; - - SmallPtrSet<Instruction *, 4> DependingInstructions; - SmallPtrSet<const BasicBlock *, 4> Visited; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - BasicBlock *BB = FI; - ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back()); - if (!Ret) continue; - - const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); - FindDependencies(NeedsPositiveRetainCount, Arg, - BB, Ret, DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Autorelease = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - if (!Autorelease) - goto next_block; - InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); - if (!IsAutorelease(AutoreleaseClass)) - goto next_block; - if (GetObjCArg(Autorelease) != Arg) - goto next_block; - - DependingInstructions.clear(); - Visited.clear(); - - // Check that there is nothing that can affect the reference - // count between the autorelease and the retain. - FindDependencies(CanChangeRetainCount, Arg, - BB, Autorelease, DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Retain = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - - // Check that we found a retain with the same argument. - if (!Retain || - !IsRetain(GetBasicInstructionClass(Retain)) || - GetObjCArg(Retain) != Arg) - goto next_block; - - DependingInstructions.clear(); - Visited.clear(); - - // Convert the autorelease to an autoreleaseRV, since it's - // returning the value. - if (AutoreleaseClass == IC_Autorelease) { - Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); - AutoreleaseClass = IC_AutoreleaseRV; - } - - // Check that there is nothing that can affect the reference - // count between the retain and the call. - // Note that Retain need not be in BB. - FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, - DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Call = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - - // Check that the pointer is the return value of the call. - if (!Call || Arg != Call) - goto next_block; - - // Check that the call is a regular call. - InstructionClass Class = GetBasicInstructionClass(Call); - if (Class != IC_CallOrUser && Class != IC_Call) - goto next_block; - - // If so, we can zap the retain and autorelease. - Changed = true; - ++NumRets; - EraseInstruction(Retain); - EraseInstruction(Autorelease); - } - } - } - - next_block: - DependingInstructions.clear(); - Visited.clear(); - } -} - -bool ObjCARCOpt::doInitialization(Module &M) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - Run = ModuleHasARC(M); - if (!Run) - return false; - - // Identify the imprecise release metadata kind. - ImpreciseReleaseMDKind = - M.getContext().getMDKindID("clang.imprecise_release"); - CopyOnEscapeMDKind = - M.getContext().getMDKindID("clang.arc.copy_on_escape"); - NoObjCARCExceptionsMDKind = - M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); - - // Intuitively, objc_retain and others are nocapture, however in practice - // they are not, because they return their argument value. And objc_release - // calls finalizers which can have arbitrary side effects. - - // These are initialized lazily. - RetainRVCallee = 0; - AutoreleaseRVCallee = 0; - ReleaseCallee = 0; - RetainCallee = 0; - RetainBlockCallee = 0; - AutoreleaseCallee = 0; - - return false; -} - -bool ObjCARCOpt::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - Changed = false; - - PA.setAA(&getAnalysis<AliasAnalysis>()); - - // This pass performs several distinct transformations. As a compile-time aid - // when compiling code that isn't ObjC, skip these if the relevant ObjC - // library functions aren't declared. - - // Preliminary optimizations. This also computs UsedInThisFunction. - OptimizeIndividualCalls(F); - - // Optimizations for weak pointers. - if (UsedInThisFunction & ((1 << IC_LoadWeak) | - (1 << IC_LoadWeakRetained) | - (1 << IC_StoreWeak) | - (1 << IC_InitWeak) | - (1 << IC_CopyWeak) | - (1 << IC_MoveWeak) | - (1 << IC_DestroyWeak))) - OptimizeWeakCalls(F); - - // Optimizations for retain+release pairs. - if (UsedInThisFunction & ((1 << IC_Retain) | - (1 << IC_RetainRV) | - (1 << IC_RetainBlock))) - if (UsedInThisFunction & (1 << IC_Release)) - // Run OptimizeSequences until it either stops making changes or - // no retain+release pair nesting is detected. - while (OptimizeSequences(F)) {} - - // Optimizations if objc_autorelease is used. - if (UsedInThisFunction & ((1 << IC_Autorelease) | - (1 << IC_AutoreleaseRV))) - OptimizeReturns(F); - - return Changed; -} - -void ObjCARCOpt::releaseMemory() { - PA.clear(); -} - -//===----------------------------------------------------------------------===// -// ARC contraction. -//===----------------------------------------------------------------------===// - -// TODO: ObjCARCContract could insert PHI nodes when uses aren't -// dominated by single calls. - -#include "llvm/Analysis/Dominators.h" -#include "llvm/InlineAsm.h" -#include "llvm/Operator.h" - -STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); - -namespace { - /// ObjCARCContract - Late ARC optimizations. These change the IR in a way - /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late. - class ObjCARCContract : public FunctionPass { - bool Changed; - AliasAnalysis *AA; - DominatorTree *DT; - ProvenanceAnalysis PA; - - /// Run - A flag indicating whether this optimization pass should run. - bool Run; - - /// StoreStrongCallee, etc. - Declarations for ObjC runtime - /// functions, for use in creating calls to them. These are initialized - /// lazily to avoid cluttering up the Module with unused declarations. - Constant *StoreStrongCallee, - *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee; - - /// RetainRVMarker - The inline asm string to insert between calls and - /// RetainRV calls to make the optimization work on targets which need it. - const MDString *RetainRVMarker; - - /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If - /// at the end of walking the function we have found no alloca - /// instructions, these calls can be marked "tail". - SmallPtrSet<CallInst *, 8> StoreStrongCalls; - - Constant *getStoreStrongCallee(Module *M); - Constant *getRetainAutoreleaseCallee(Module *M); - Constant *getRetainAutoreleaseRVCallee(Module *M); - - bool ContractAutorelease(Function &F, Instruction *Autorelease, - InstructionClass Class, - SmallPtrSet<Instruction *, 4> - &DependingInstructions, - SmallPtrSet<const BasicBlock *, 4> - &Visited); - - void ContractRelease(Instruction *Release, - inst_iterator &Iter); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - public: - static char ID; - ObjCARCContract() : FunctionPass(ID) { - initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCContract::ID = 0; -INITIALIZE_PASS_BEGIN(ObjCARCContract, - "objc-arc-contract", "ObjC ARC contraction", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_END(ObjCARCContract, - "objc-arc-contract", "ObjC ARC contraction", false, false) - -Pass *llvm::createObjCARCContractPass() { - return new ObjCARCContract(); -} - -void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<AliasAnalysis>(); - AU.addRequired<DominatorTree>(); - AU.setPreservesCFG(); -} - -Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { - if (!StoreStrongCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - Type *Params[] = { I8XX, I8X }; - - AttributeSet Attributes = AttributeSet() - .addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attributes::get(C, Attributes::NoUnwind)) - .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture)); - - StoreStrongCallee = - M->getOrInsertFunction( - "objc_storeStrong", - FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), - Attributes); - } - return StoreStrongCallee; -} - -Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { - if (!RetainAutoreleaseCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attributes = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attributes::get(C, Attributes::NoUnwind)); - RetainAutoreleaseCallee = - M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); - } - return RetainAutoreleaseCallee; -} - -Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { - if (!RetainAutoreleaseRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attributes = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attributes::get(C, Attributes::NoUnwind)); - RetainAutoreleaseRVCallee = - M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, - Attributes); - } - return RetainAutoreleaseRVCallee; -} - -/// ContractAutorelease - Merge an autorelease with a retain into a fused call. -bool -ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, - InstructionClass Class, - SmallPtrSet<Instruction *, 4> - &DependingInstructions, - SmallPtrSet<const BasicBlock *, 4> - &Visited) { - const Value *Arg = GetObjCArg(Autorelease); - - // Check that there are no instructions between the retain and the autorelease - // (such as an autorelease_pop) which may change the count. - CallInst *Retain = 0; - if (Class == IC_AutoreleaseRV) - FindDependencies(RetainAutoreleaseRVDep, Arg, - Autorelease->getParent(), Autorelease, - DependingInstructions, Visited, PA); - else - FindDependencies(RetainAutoreleaseDep, Arg, - Autorelease->getParent(), Autorelease, - DependingInstructions, Visited, PA); - - Visited.clear(); - if (DependingInstructions.size() != 1) { - DependingInstructions.clear(); - return false; - } - - Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - DependingInstructions.clear(); - - if (!Retain || - GetBasicInstructionClass(Retain) != IC_Retain || - GetObjCArg(Retain) != Arg) - return false; - - Changed = true; - ++NumPeeps; - - if (Class == IC_AutoreleaseRV) - Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); - else - Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); - - EraseInstruction(Autorelease); - return true; -} - -/// ContractRelease - Attempt to merge an objc_release with a store, load, and -/// objc_retain to form an objc_storeStrong. This can be a little tricky because -/// the instructions don't always appear in order, and there may be unrelated -/// intervening instructions. -void ObjCARCContract::ContractRelease(Instruction *Release, - inst_iterator &Iter) { - LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); - if (!Load || !Load->isSimple()) return; - - // For now, require everything to be in one basic block. - BasicBlock *BB = Release->getParent(); - if (Load->getParent() != BB) return; - - // Walk down to find the store and the release, which may be in either order. - BasicBlock::iterator I = Load, End = BB->end(); - ++I; - AliasAnalysis::Location Loc = AA->getLocation(Load); - StoreInst *Store = 0; - bool SawRelease = false; - for (; !Store || !SawRelease; ++I) { - if (I == End) - return; - - Instruction *Inst = I; - if (Inst == Release) { - SawRelease = true; - continue; - } - - InstructionClass Class = GetBasicInstructionClass(Inst); - - // Unrelated retains are harmless. - if (IsRetain(Class)) - continue; - - if (Store) { - // The store is the point where we're going to put the objc_storeStrong, - // so make sure there are no uses after it. - if (CanUse(Inst, Load, PA, Class)) - return; - } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { - // We are moving the load down to the store, so check for anything - // else which writes to the memory between the load and the store. - Store = dyn_cast<StoreInst>(Inst); - if (!Store || !Store->isSimple()) return; - if (Store->getPointerOperand() != Loc.Ptr) return; - } - } - - Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); - - // Walk up to find the retain. - I = Store; - BasicBlock::iterator Begin = BB->begin(); - while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) - --I; - Instruction *Retain = I; - if (GetBasicInstructionClass(Retain) != IC_Retain) return; - if (GetObjCArg(Retain) != New) return; - - Changed = true; - ++NumStoreStrongs; - - LLVMContext &C = Release->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - - Value *Args[] = { Load->getPointerOperand(), New }; - if (Args[0]->getType() != I8XX) - Args[0] = new BitCastInst(Args[0], I8XX, "", Store); - if (Args[1]->getType() != I8X) - Args[1] = new BitCastInst(Args[1], I8X, "", Store); - CallInst *StoreStrong = - CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), - Args, "", Store); - StoreStrong->setDoesNotThrow(); - StoreStrong->setDebugLoc(Store->getDebugLoc()); - - // We can't set the tail flag yet, because we haven't yet determined - // whether there are any escaping allocas. Remember this call, so that - // we can set the tail flag once we know it's safe. - StoreStrongCalls.insert(StoreStrong); - - if (&*Iter == Store) ++Iter; - Store->eraseFromParent(); - Release->eraseFromParent(); - EraseInstruction(Retain); - if (Load->use_empty()) - Load->eraseFromParent(); -} - -bool ObjCARCContract::doInitialization(Module &M) { - // If nothing in the Module uses ARC, don't do anything. - Run = ModuleHasARC(M); - if (!Run) - return false; - - // These are initialized lazily. - StoreStrongCallee = 0; - RetainAutoreleaseCallee = 0; - RetainAutoreleaseRVCallee = 0; - - // Initialize RetainRVMarker. - RetainRVMarker = 0; - if (NamedMDNode *NMD = - M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) - if (NMD->getNumOperands() == 1) { - const MDNode *N = NMD->getOperand(0); - if (N->getNumOperands() == 1) - if (const MDString *S = dyn_cast<MDString>(N->getOperand(0))) - RetainRVMarker = S; - } - - return false; -} - -bool ObjCARCContract::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - Changed = false; - AA = &getAnalysis<AliasAnalysis>(); - DT = &getAnalysis<DominatorTree>(); - - PA.setAA(&getAnalysis<AliasAnalysis>()); - - // Track whether it's ok to mark objc_storeStrong calls with the "tail" - // keyword. Be conservative if the function has variadic arguments. - // It seems that functions which "return twice" are also unsafe for the - // "tail" argument, because they are setjmp, which could need to - // return to an earlier stack state. - bool TailOkForStoreStrongs = !F.isVarArg() && - !F.callsFunctionThatReturnsTwice(); - - // For ObjC library calls which return their argument, replace uses of the - // argument with uses of the call return value, if it dominates the use. This - // reduces register pressure. - SmallPtrSet<Instruction *, 4> DependingInstructions; - SmallPtrSet<const BasicBlock *, 4> Visited; - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - - // Only these library routines return their argument. In particular, - // objc_retainBlock does not necessarily return its argument. - InstructionClass Class = GetBasicInstructionClass(Inst); - switch (Class) { - case IC_Retain: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - break; - case IC_Autorelease: - case IC_AutoreleaseRV: - if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) - continue; - break; - case IC_RetainRV: { - // If we're compiling for a target which needs a special inline-asm - // marker to do the retainAutoreleasedReturnValue optimization, - // insert it now. - if (!RetainRVMarker) - break; - BasicBlock::iterator BBI = Inst; - BasicBlock *InstParent = Inst->getParent(); - - // Step up to see if the call immediately precedes the RetainRV call. - // If it's an invoke, we have to cross a block boundary. And we have - // to carefully dodge no-op instructions. - do { - if (&*BBI == InstParent->begin()) { - BasicBlock *Pred = InstParent->getSinglePredecessor(); - if (!Pred) - goto decline_rv_optimization; - BBI = Pred->getTerminator(); - break; - } - --BBI; - } while (isNoopInstruction(BBI)); - - if (&*BBI == GetObjCArg(Inst)) { - Changed = true; - InlineAsm *IA = - InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), - /*isVarArg=*/false), - RetainRVMarker->getString(), - /*Constraints=*/"", /*hasSideEffects=*/true); - CallInst::Create(IA, "", Inst); - } - decline_rv_optimization: - break; - } - case IC_InitWeak: { - // objc_initWeak(p, null) => *p = null - CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(1))) { - Value *Null = - ConstantPointerNull::get(cast<PointerType>(CI->getType())); - Changed = true; - new StoreInst(Null, CI->getArgOperand(0), CI); - CI->replaceAllUsesWith(Null); - CI->eraseFromParent(); - } - continue; - } - case IC_Release: - ContractRelease(Inst, I); - continue; - case IC_User: - // Be conservative if the function has any alloca instructions. - // Technically we only care about escaping alloca instructions, - // but this is sufficient to handle some interesting cases. - if (isa<AllocaInst>(Inst)) - TailOkForStoreStrongs = false; - continue; - default: - continue; - } - - // Don't use GetObjCArg because we don't want to look through bitcasts - // and such; to do the replacement, the argument must have type i8*. - const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); - for (;;) { - // If we're compiling bugpointed code, don't get in trouble. - if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) - break; - // Look through the uses of the pointer. - for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); - UI != UE; ) { - Use &U = UI.getUse(); - unsigned OperandNo = UI.getOperandNo(); - ++UI; // Increment UI now, because we may unlink its element. - - // If the call's return value dominates a use of the call's argument - // value, rewrite the use to use the return value. We check for - // reachability here because an unreachable call is considered to - // trivially dominate itself, which would lead us to rewriting its - // argument in terms of its return value, which would lead to - // infinite loops in GetObjCArg. - if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { - Changed = true; - Instruction *Replacement = Inst; - Type *UseTy = U.get()->getType(); - if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) { - // For PHI nodes, insert the bitcast in the predecessor block. - unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); - BasicBlock *BB = PHI->getIncomingBlock(ValNo); - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - &BB->back()); - // While we're here, rewrite all edges for this PHI, rather - // than just one use at a time, to minimize the number of - // bitcasts we emit. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingBlock(i) == BB) { - // Keep the UI iterator valid. - if (&PHI->getOperandUse( - PHINode::getOperandNumForIncomingValue(i)) == - &UI.getUse()) - ++UI; - PHI->setIncomingValue(i, Replacement); - } - } else { - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - cast<Instruction>(U.getUser())); - U.set(Replacement); - } - } - } - - // If Arg is a no-op casted pointer, strip one level of casts and iterate. - if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) - Arg = BI->getOperand(0); - else if (isa<GEPOperator>(Arg) && - cast<GEPOperator>(Arg)->hasAllZeroIndices()) - Arg = cast<GEPOperator>(Arg)->getPointerOperand(); - else if (isa<GlobalAlias>(Arg) && - !cast<GlobalAlias>(Arg)->mayBeOverridden()) - Arg = cast<GlobalAlias>(Arg)->getAliasee(); - else - break; - } - } - - // If this function has no escaping allocas or suspicious vararg usage, - // objc_storeStrong calls can be marked with the "tail" keyword. - if (TailOkForStoreStrongs) - for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(), - E = StoreStrongCalls.end(); I != E; ++I) - (*I)->setTailCall(); - StoreStrongCalls.clear(); - - return Changed; -} diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 569439aaf4..0da3746950 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -28,12 +28,12 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 5524e01230..07f540a301 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -19,11 +19,11 @@ #define DEBUG_TYPE "reg2mem" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" -#include "llvm/BasicBlock.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 28aaddc50e..e30a2746b0 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -26,11 +26,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/InstVisitor.h" -#include "llvm/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" @@ -271,13 +271,6 @@ public: return I->second; } - /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { - DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = - StructValueState.find(std::make_pair(V, i)); - assert(I != StructValueState.end() && "V is not in valuemap!"); - return I->second; - }*/ - /// getTrackedRetVals - Get the inferred return value map. /// const DenseMap<Function*, LatticeVal> &getTrackedRetVals() { @@ -710,9 +703,6 @@ void SCCPSolver::visitPHINode(PHINode &PN) { markConstant(&PN, OperandVal); // Acquire operand value } - - - void SCCPSolver::visitReturnInst(ReturnInst &I) { if (I.getNumOperands() == 0) return; // ret void @@ -1185,7 +1175,7 @@ void SCCPSolver::Solve() { DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from - // bottom to constant + // bottom to constant, or to overdefined. // // Anything on this worklist that is overdefined need not be visited // since all of its users will have already been marked as overdefined diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 1c220ca0f6..810a553c74 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -33,24 +33,22 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" #include "llvm/DIBuilder.h" -#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/InstVisitor.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" @@ -411,9 +409,9 @@ static Value *foldSelectInst(SelectInst &SI) { // early on. if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) return SI.getOperand(1+CI->isZero()); - if (SI.getOperand(1) == SI.getOperand(2)) { + if (SI.getOperand(1) == SI.getOperand(2)) return SI.getOperand(1); - } + return 0; } @@ -621,7 +619,7 @@ private: } // Disable SRoA for any intrinsics except for lifetime invariants. - // FIXME: What about debug instrinsics? This matches old behavior, but + // FIXME: What about debug intrinsics? This matches old behavior, but // doesn't make sense. void visitIntrinsicInst(IntrinsicInst &II) { if (!IsOffsetKnown) @@ -1099,13 +1097,12 @@ Type *AllocaPartitioning::getCommonType(iterator I) const { continue; Type *UserTy = 0; - if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) UserTy = LI->getType(); - } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) { + else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) UserTy = SI->getValueOperand()->getType(); - } else { + else return 0; // Bail if we have weird uses. - } if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) { // If the type is larger than the partition, skip it. We only encounter @@ -1141,8 +1138,7 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { - for (const_use_iterator UI = use_begin(I), UE = use_end(I); - UI != UE; ++UI) { + for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { if (!UI->U) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " @@ -1170,8 +1166,7 @@ void AllocaPartitioning::print(raw_ostream &OS) const { } OS << "Partitioning of alloca: " << AI << "\n"; - unsigned Num = 0; - for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) { + for (const_iterator I = begin(), E = end(); I != E; ++I) { print(OS, I); printUsers(OS, I); } @@ -1242,7 +1237,7 @@ public: for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = NULL; + Value *Arg = 0; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1277,7 +1272,7 @@ namespace { /// 1) It takes allocations of aggregates and analyzes the ways in which they /// are used to try to split them into smaller allocations, ideally of /// a single scalar data type. It will split up memcpy and memset accesses -/// as necessary and try to isolate invidual scalar accesses. +/// as necessary and try to isolate individual scalar accesses. /// 2) It will transform accesses into forms which are suitable for SSA value /// promotion. This can be replacing a memset with a scalar store of an /// integer value, or it can involve speculating operations on a PHI or @@ -1439,8 +1434,7 @@ private: // We can only transform this if it is safe to push the loads into the // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; - ++Idx) { + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); Value *InVal = PN.getIncomingValue(Idx); @@ -1483,7 +1477,7 @@ private: PN.getName() + ".sroa.speculated"); // Get the TBAA tag and alignment to use from one of the loads. It doesn't - // matter which one we get and if any differ, it doesn't matter. + // matter which one we get and if any differ. LoadInst *SomeLoad = cast<LoadInst>(Loads.back()); MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); unsigned Align = SomeLoad->getAlignment(); @@ -1576,13 +1570,13 @@ private: void visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); // If the select isn't safe to speculate, just use simple logic to emit it. SmallVector<LoadInst *, 4> Loads; if (!isSafeSelectToSpeculate(SI, Loads)) return; + IRBuilder<> IRB(&SI); Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; AllocaPartitioning::iterator PIs[2]; AllocaPartitioning::PartitionUse PUs[2]; @@ -1642,44 +1636,6 @@ private: }; } -/// \brief Accumulate the constant offsets in a GEP into a single APInt offset. -/// -/// If the provided GEP is all-constant, the total byte offset formed by the -/// GEP is computed and Offset is set to it. If the GEP has any non-constant -/// operands, the function returns false and the value of Offset is unmodified. -static bool accumulateGEPOffsets(const DataLayout &TD, GEPOperator &GEP, - APInt &Offset) { - APInt GEPOffset(Offset.getBitWidth(), 0); - for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); - GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); - if (!OpC) - return false; - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD.getStructLayout(STy); - GEPOffset += APInt(Offset.getBitWidth(), - SL->getElementOffset(ElementIdx)); - continue; - } - - APInt TypeSize(Offset.getBitWidth(), - TD.getTypeAllocSize(GTI.getIndexedType())); - if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) { - assert((VTy->getScalarSizeInBits() % 8) == 0 && - "vector element size is not a multiple of 8, cannot GEP over it"); - TypeSize = VTy->getScalarSizeInBits() / 8; - } - - GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize; - } - Offset = GEPOffset; - return true; -} - /// \brief Build a GEP out of a base pointer and indices. /// /// This will return the BasePtr if that is valid, or build a new GEP @@ -1762,7 +1718,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, // extremely poorly defined currently. The long-term goal is to remove GEPing // over a vector from the IR completely. if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) { - unsigned ElementSizeInBits = VecTy->getScalarSizeInBits(); + unsigned ElementSizeInBits = TD.getTypeSizeInBits(VecTy->getScalarType()); if (ElementSizeInBits % 8) return 0; // GEPs over non-multiple of 8 size vector elements are invalid. APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); @@ -1854,7 +1810,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, /// The strategy for finding the more natural GEPs is to peel off layers of the /// pointer, walking back through bit casts and GEPs, searching for a base /// pointer from which we can compute a natural GEP with the desired -/// properities. The algorithm tries to fold as many constant indices into +/// properties. The algorithm tries to fold as many constant indices into /// a single GEP as possible, thus making each GEP more independent of the /// surrounding code. static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, @@ -1882,7 +1838,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, // First fold any existing GEPs into the offset. while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { APInt GEPOffset(Offset.getBitWidth(), 0); - if (!accumulateGEPOffsets(TD, *GEP, GEPOffset)) + if (!GEP->accumulateConstantOffset(TD, GEPOffset)) break; Offset += GEPOffset; Ptr = GEP->getPointerOperand(); @@ -2009,15 +1965,14 @@ static bool isVectorPromotionViable(const DataLayout &TD, if (!Ty) return false; - uint64_t VecSize = TD.getTypeSizeInBits(Ty); - uint64_t ElementSize = Ty->getScalarSizeInBits(); + uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType()); // While the definition of LLVM vectors is bitpacked, we don't support sizes // that aren't byte sized. if (ElementSize % 8) return false; - assert((VecSize % 8) == 0 && "vector size not a multiple of element size?"); - VecSize /= 8; + assert((TD.getTypeSizeInBits(Ty) % 8) == 0 && + "vector size not a multiple of element size?"); ElementSize /= 8; for (; I != E; ++I) { @@ -2101,9 +2056,9 @@ static bool isIntegerWideningViable(const DataLayout &TD, uint64_t Size = TD.getTypeStoreSize(AllocaTy); - // Check the uses to ensure the uses are (likely) promoteable integer uses. + // Check the uses to ensure the uses are (likely) promotable integer uses. // Also ensure that the alloca has a covering load or store. We don't want - // to widen the integer operotains only to fail to promote due to some other + // to widen the integer operations only to fail to promote due to some other // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { @@ -2150,7 +2105,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, !canConvertValue(TD, ValueTy, AllocaTy)) return false; } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { - if (MI->isVolatile()) + if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO @@ -2223,6 +2178,84 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, return V; } +static Value *extractVector(IRBuilder<> &IRB, Value *V, + unsigned BeginIndex, unsigned EndIndex, + const Twine &Name) { + VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + + if (NumElements == VecTy->getNumElements()) + return V; + + if (NumElements == 1) { + V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), + Name + ".extract"); + DEBUG(dbgs() << " extract: " << *V << "\n"); + return V; + } + + SmallVector<Constant*, 8> Mask; + Mask.reserve(NumElements); + for (unsigned i = BeginIndex; i != EndIndex; ++i) + Mask.push_back(IRB.getInt32(i)); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + Name + ".extract"); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); + return V; +} + +static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V, + unsigned BeginIndex, const Twine &Name) { + VectorType *VecTy = cast<VectorType>(Old->getType()); + assert(VecTy && "Can only insert a vector into a vector"); + + VectorType *Ty = dyn_cast<VectorType>(V->getType()); + if (!Ty) { + // Single element to insert. + V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), + Name + ".insert"); + DEBUG(dbgs() << " insert: " << *V << "\n"); + return V; + } + + assert(Ty->getNumElements() <= VecTy->getNumElements() && + "Too many elements!"); + if (Ty->getNumElements() == VecTy->getNumElements()) { + assert(V->getType() == VecTy && "Vector type mismatch"); + return V; + } + unsigned EndIndex = BeginIndex + Ty->getNumElements(); + + // When inserting a smaller vector into the larger to store, we first + // use a shuffle vector to widen it with undef elements, and then + // a second shuffle vector to select between the loaded vector and the + // incoming vector. + SmallVector<Constant*, 8> Mask; + Mask.reserve(VecTy->getNumElements()); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i - BeginIndex)); + else + Mask.push_back(UndefValue::get(IRB.getInt32Ty())); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + Name + ".expand"); + DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + + Mask.clear(); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i)); + else + Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); + V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask), + Name + "insert"); + DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + return V; +} + namespace { /// \brief Visitor to rewrite instructions using a partition of an alloca to /// use a new alloca. @@ -2244,7 +2277,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, // If we are rewriting an alloca partition which can be written as pure // vector operations, we stash extra information here. When VecTy is - // non-null, we have some strict guarantees about the rewriten alloca: + // non-null, we have some strict guarantees about the rewritten alloca: // - The new alloca is exactly the size of the vector type here. // - The accesses all either map to the entire vector or to a single // element. @@ -2292,9 +2325,9 @@ public: ++NumVectorized; VecTy = cast<VectorType>(NewAI.getAllocatedType()); ElementTy = VecTy->getElementType(); - assert((VecTy->getScalarSizeInBits() % 8) == 0 && + assert((TD.getTypeSizeInBits(VecTy->getScalarType()) % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); - ElementSize = VecTy->getScalarSizeInBits() / 8; + ElementSize = TD.getTypeSizeInBits(VecTy->getScalarType()) / 8; } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(), NewAllocaBeginOffset, P, I, E)) { IntTy = Type::getIntNTy(NewAI.getContext(), @@ -2388,29 +2421,14 @@ private: Pass.DeadInsts.insert(I); } - Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { - Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); - unsigned NumElements = EndIndex - BeginIndex; - assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - if (NumElements == 1) { - V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), - getName(".extract")); - DEBUG(dbgs() << " extract: " << *V << "\n"); - } else if (NumElements < VecTy->getNumElements()) { - SmallVector<Constant*, 8> Mask; - Mask.reserve(NumElements); - for (unsigned i = BeginIndex; i != EndIndex; ++i) - Mask.push_back(IRB.getInt32(i)); - V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(Mask), - getName(".extract")); - DEBUG(dbgs() << " shuffle: " << *V << "\n"); - } - return V; + + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec")); } Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { @@ -2431,7 +2449,6 @@ private: DEBUG(dbgs() << " original: " << LI << "\n"); Value *OldOp = LI.getOperand(0); assert(OldOp == OldPtr); - IRBuilder<> IRB(&LI); uint64_t Size = EndOffset - BeginOffset; bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); @@ -2452,12 +2469,13 @@ private: return true; } + IRBuilder<> IRB(&LI); Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) : LI.getType(); bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(IRB, LI, OldOp); + V = rewriteVectorizedLoadInst(IRB); } else if (IntTy && LI.getType()->isIntegerTy()) { V = rewriteIntegerLoad(IRB, LI); } else if (BeginOffset == NewAllocaBeginOffset && @@ -2518,44 +2536,12 @@ private: : VectorType::get(ElementTy, NumElements); if (V->getType() != PartitionTy) V = convertValue(TD, IRB, V, PartitionTy); - if (NumElements < VecTy->getNumElements()) { - // We need to mix in the existing elements. - LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - if (NumElements == 1) { - V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex), - getName(".insert")); - DEBUG(dbgs() << " insert: " << *V << "\n"); - } else { - // When inserting a smaller vector into the larger to store, we first - // use a shuffle vector to widen it with undef elements, and then - // a second shuffle vector to select between the loaded vector and the - // incoming vector. - SmallVector<Constant*, 8> Mask; - Mask.reserve(VecTy->getNumElements()); - for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i - BeginIndex)); - else - Mask.push_back(UndefValue::get(IRB.getInt32Ty())); - V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(Mask), - getName(".expand")); - DEBUG(dbgs() << " shuffle1: " << *V << "\n"); - - Mask.clear(); - for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i)); - else - Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); - V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask), - getName("insert")); - DEBUG(dbgs() << " shuffle2: " << *V << "\n"); - } - } else { - V = convertValue(TD, IRB, V, VecTy); - } + + // Mix in the existing elements. + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + V = insertVector(IRB, Old, V, BeginIndex, getName(".vec")); + StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.insert(&SI); @@ -2607,7 +2593,7 @@ private: TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); assert(V->getType()->getIntegerBitWidth() == - TD.getTypeSizeInBits(OldAI.getAllocatedType()) && + TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, @@ -2639,6 +2625,40 @@ private: return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); } + /// \brief Compute an integer value from splatting an i8 across the given + /// number of bytes. + /// + /// Note that this routine assumes an i8 is a byte. If that isn't true, don't + /// call this routine. + /// FIXME: Heed the advice above. + /// + /// \param V The i8 value to splat. + /// \param Size The number of bytes in the output (assuming i8 is one byte) + Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) { + assert(Size > 0 && "Expected a positive number of bytes."); + IntegerType *VTy = cast<IntegerType>(V->getType()); + assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); + if (Size == 1) + return V; + + Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), + ConstantExpr::getUDiv( + Constant::getAllOnesValue(SplatIntTy), + ConstantExpr::getZExt( + Constant::getAllOnesValue(V->getType()), + SplatIntTy)), + getName(".isplat")); + return V; + } + + /// \brief Compute a vector splat for a given element value. + Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) { + V = IRB.CreateVectorSplat(NumElements, V, NamePrefix); + DEBUG(dbgs() << " splat: " << *V << "\n"); + return V; + } + bool visitMemSetInst(MemSetInst &II) { DEBUG(dbgs() << " original: " << II << "\n"); IRBuilder<> IRB(&II); @@ -2667,7 +2687,8 @@ private: (BeginOffset != NewAllocaBeginOffset || EndOffset != NewAllocaEndOffset || !AllocaTy->isSingleValueType() || - !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { + !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) || + TD.getTypeSizeInBits(ScalarTy)%8 != 0)) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); CallInst *New @@ -2683,53 +2704,62 @@ private: // If we can represent this as a simple value, we have to build the actual // value to store, which requires expanding the byte present in memset to // a sensible representation for the alloca type. This is essentially - // splatting the byte to a sufficiently wide integer, bitcasting to the - // desired scalar type, and splatting it across any desired vector type. - uint64_t Size = EndOffset - BeginOffset; - Value *V = II.getValue(); - IntegerType *VTy = cast<IntegerType>(V->getType()); - Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); - if (Size*8 > VTy->getBitWidth()) - V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), - ConstantExpr::getUDiv( - Constant::getAllOnesValue(SplatIntTy), - ConstantExpr::getZExt( - Constant::getAllOnesValue(V->getType()), - SplatIntTy)), - getName(".isplat")); - - // If this is an element-wide memset of a vectorizable alloca, insert it. - if (VecTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset)) { - if (V->getType() != ScalarTy) - V = convertValue(TD, IRB, V, ScalarTy); - StoreInst *Store = IRB.CreateAlignedStore( - IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, - NewAI.getAlignment(), - getName(".load")), - V, IRB.getInt32(getIndex(BeginOffset)), - getName(".insert")), - &NewAI, NewAI.getAlignment()); - (void)Store; - DEBUG(dbgs() << " to: " << *Store << "\n"); - return true; - } + // splatting the byte to a sufficiently wide integer, splatting it across + // any desired vector width, and bitcasting to the final type. + Value *V; + + if (VecTy) { + // If this is a memset of a vectorized alloca, insert it. + assert(ElementTy == ScalarTy); + + unsigned BeginIndex = getIndex(BeginOffset); + unsigned EndIndex = getIndex(EndOffset); + assert(EndIndex > BeginIndex && "Empty vector!"); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + + Value *Splat = getIntegerSplat(IRB, II.getValue(), + TD.getTypeSizeInBits(ElementTy)/8); + Splat = convertValue(TD, IRB, Splat, ElementTy); + if (NumElements > 1) + Splat = getVectorSplat(IRB, Splat, NumElements); - // If this is a memset on an alloca where we can widen stores, insert the - // set integer. - if (IntTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset)) { - assert(!II.isVolatile()); Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); - Old = convertValue(TD, IRB, Old, IntTy); - assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); - uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); - } + V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec")); + } else if (IntTy) { + // If this is a memset on an alloca where we can widen stores, insert the + // set integer. + assert(!II.isVolatile()); + + uint64_t Size = EndOffset - BeginOffset; + V = getIntegerSplat(IRB, II.getValue(), Size); + + if (IntTy && (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaBeginOffset)) { + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Old = convertValue(TD, IRB, Old, IntTy); + assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t Offset = BeginOffset - NewAllocaBeginOffset; + V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); + } else { + assert(V->getType() == IntTy && + "Wrong type for an alloca wide integer!"); + } + V = convertValue(TD, IRB, V, AllocaTy); + } else { + // Established these invariants above. + assert(BeginOffset == NewAllocaBeginOffset); + assert(EndOffset == NewAllocaEndOffset); + + V = getIntegerSplat(IRB, II.getValue(), + TD.getTypeSizeInBits(ScalarTy)/8); + if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) + V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements()); - if (V->getType() != AllocaTy) V = convertValue(TD, IRB, V, AllocaTy); + } Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), II.isVolatile()); @@ -2814,37 +2844,22 @@ private: // Record this instruction for deletion. Pass.DeadInsts.insert(&II); - bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && - EndOffset == NewAllocaEndOffset; - bool IsVectorElement = VecTy && !IsWholeAlloca; - uint64_t Size = EndOffset - BeginOffset; - IntegerType *SubIntTy - = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; - - Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() - : II.getRawDest()->getType(); - if (!EmitMemCpy) { - if (IsVectorElement) - OtherPtrTy = VecTy->getElementType()->getPointerTo(); - else if (IntTy && !IsWholeAlloca) - OtherPtrTy = SubIntTy->getPointerTo(); - else - OtherPtrTy = NewAI.getType(); - } - - // Compute the other pointer, folding as much as possible to produce - // a single, simple GEP in most cases. - Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); - OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); - // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. + Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); if (AllocaInst *AI = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) Pass.Worklist.insert(AI); if (EmitMemCpy) { + Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() + : II.getRawDest()->getType(); + + // Compute the other pointer, folding as much as possible to produce + // a single, simple GEP in most cases. + OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, + getName("." + OtherPtr->getName())); + Value *OurPtr = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() : II.getRawSource()->getType()); @@ -2865,18 +2880,38 @@ private: if (!Align) Align = 1; - Value *SrcPtr = OtherPtr; + bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset; + uint64_t Size = EndOffset - BeginOffset; + unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0; + unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0; + unsigned NumElements = EndIndex - BeginIndex; + IntegerType *SubIntTy + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; + + Type *OtherPtrTy = NewAI.getType(); + if (VecTy && !IsWholeAlloca) { + if (NumElements == 1) + OtherPtrTy = VecTy->getElementType(); + else + OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements); + + OtherPtrTy = OtherPtrTy->getPointerTo(); + } else if (IntTy && !IsWholeAlloca) { + OtherPtrTy = SubIntTy->getPointerTo(); + } + + Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, + getName("." + OtherPtr->getName())); Value *DstPtr = &NewAI; if (!IsDest) std::swap(SrcPtr, DstPtr); Value *Src; - if (IsVectorElement && !IsDest) { - // We have to extract rather than load. - Src = IRB.CreateExtractElement( - IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), - IRB.getInt32(getIndex(BeginOffset)), - getName(".copyextract")); + if (VecTy && !IsWholeAlloca && !IsDest) { + Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec")); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); @@ -2889,7 +2924,11 @@ private: getName(".copyload")); } - if (IntTy && !IsWholeAlloca && IsDest) { + if (VecTy && !IsWholeAlloca && IsDest) { + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec")); + } else if (IntTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); Old = convertValue(TD, IRB, Old, IntTy); @@ -2899,14 +2938,6 @@ private: Src = convertValue(TD, IRB, Src, NewAllocaTy); } - if (IsVectorElement && IsDest) { - // We have to insert into a loaded copy before storing. - Src = IRB.CreateInsertElement( - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - Src, IRB.getInt32(getIndex(BeginOffset)), - getName(".insert")); - } - StoreInst *Store = cast<StoreInst>( IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); (void)Store; @@ -2934,6 +2965,7 @@ private: else New = IRB.CreateLifetimeEnd(Ptr, Size); + (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return true; } @@ -3110,9 +3142,8 @@ private: void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. - Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices, - Name + ".gep"), - Name + ".load"); + Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"); + Value *Load = IRB.CreateLoad(GEP, Name + ".load"); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); DEBUG(dbgs() << " to: " << *Load << "\n"); } @@ -3385,7 +3416,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, // Check for the case where we're going to rewrite to a new alloca of the // exact same type as the original, and with the same access offsets. In that // case, re-use the existing alloca, but still run through the rewriter to - // performe phi and select speculation. + // perform phi and select speculation. AllocaInst *NewAI; if (AllocaTy == AI.getAllocatedType()) { assert(PI->BeginOffset == 0 && @@ -3552,7 +3583,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { /// If there is a domtree available, we attempt to promote using the full power /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is /// based on the SSAUpdater utilities. This function returns whether any -/// promotion occured. +/// promotion occurred. bool SROA::promoteAllocas(Function &F) { if (PromotableAllocas.empty()) return false; diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 762bb15c59..8a9c7da113 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -18,7 +18,7 @@ #include "llvm-c/Transforms/Scalar.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" @@ -50,11 +50,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLowerAtomicPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); - initializeObjCARCAliasAnalysisPass(Registry); - initializeObjCARCAPElimPass(Registry); - initializeObjCARCExpandPass(Registry); - initializeObjCARCContractPass(Registry); - initializeObjCARCOptPass(Registry); initializeReassociatePass(Registry); initializeRegToMemPass(Registry); initializeSCCPPass(Registry); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index c8656fbd8e..e590a374ea 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -27,19 +27,19 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" #include "llvm/DIBuilder.h" -#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9160f04fe2..c243d34fd7 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -26,15 +26,15 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Attributes.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -48,12 +48,19 @@ namespace { } virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfo>(); + } }; } char CFGSimplifyPass::ID = 0; -INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg", - "Simplify the CFG", false, false) +INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", + false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", + false, false) // Public interface to the CFGSimplification pass FunctionPass *llvm::createCFGSimplificationPass() { @@ -111,13 +118,11 @@ static bool markAliveBlocks(BasicBlock *BB, SmallVector<BasicBlock*, 128> Worklist; Worklist.push_back(BB); + Reachable.insert(BB); bool Changed = false; do { BB = Worklist.pop_back_val(); - if (!Reachable.insert(BB)) - continue; - // Do a quick scan of the basic block, turning any obviously unreachable // instructions into LLVM unreachable insts. The instruction combining pass // canonicalizes unreachable insts into stores to null or undef. @@ -176,7 +181,8 @@ static bool markAliveBlocks(BasicBlock *BB, Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) - Worklist.push_back(*SI); + if (Reachable.insert(*SI)) + Worklist.push_back(*SI); } while (!Worklist.empty()); return Changed; } @@ -294,8 +300,8 @@ static bool mergeEmptyReturnBlocks(Function &F) { /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD, - const TargetTransformInfo *TTI) { +static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, + const DataLayout *TD) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -304,7 +310,7 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD, // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TD, TTI)) { + if (SimplifyCFG(BBIt++, TTI, TD)) { LocalChange = true; ++NumSimpl; } @@ -318,12 +324,11 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD, // simplify the CFG. // bool CFGSimplifyPass::runOnFunction(Function &F) { + const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); - const TargetTransformInfo *TTI = - getAnalysisIfAvailable<TargetTransformInfo>(); bool EverChanged = removeUnreachableBlocksFromFn(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TD, TTI); + EverChanged |= iterativelySimplifyCFG(F, TTI, TD); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -337,7 +342,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { return true; do { - EverChanged = iterativelySimplifyCFG(F, TD, TTI); + EverChanged = iterativelySimplifyCFG(F, TTI, TD); EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index d4643b9d80..916b37d4a8 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -23,10 +23,10 @@ #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! -#include "llvm/DataLayout.h" -#include "llvm/IRBuilder.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -165,7 +165,7 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { // Ignore non-calls. CallInst *CI = dyn_cast<CallInst>(I++); - if (!CI) continue; + if (!CI || CI->hasFnAttr(Attribute::NoBuiltin)) continue; // Ignore indirect calls and calls to non-external functions. Function *Callee = CI->getCalledFunction(); diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index cde9c178ad..d4595bb373 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -20,7 +20,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index e357378524..2002e680d1 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -58,12 +58,13 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CallSite.h" @@ -79,11 +80,15 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { + const TargetTransformInfo *TTI; + static char ID; // Pass identification, replacement for typeid TailCallElim() : FunctionPass(ID) { initializeTailCallElimPass(*PassRegistry::getPassRegistry()); } + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); private: @@ -109,14 +114,21 @@ namespace { } char TailCallElim::ID = 0; -INITIALIZE_PASS(TailCallElim, "tailcallelim", - "Tail Call Elimination", false, false) +INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false) // Public interface to the TailCallElimination pass FunctionPass *llvm::createTailCallEliminationPass() { return new TailCallElim(); } +void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfo>(); +} + /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by /// callees of this function. We only do very simple analysis right now, this /// could be expanded in the future to use mod/ref information for particular @@ -151,6 +163,7 @@ bool TailCallElim::runOnFunction(Function &F) { // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; + TTI = &getAnalysis<TargetTransformInfo>(); BasicBlock *OldEntry = 0; bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; @@ -391,7 +404,8 @@ TailCallElim::FindTRECandidate(Instruction *TI, if (BB == &F->getEntryBlock() && FirstNonDbg(BB->front()) == CI && FirstNonDbg(llvm::next(BB->begin())) == TI && - callIsSmall(CI)) { + CI->getCalledFunction() && + !TTI->isLoweredToCall(CI->getCalledFunction())) { // A single-block function with just a call and a return. Check that // the arguments match. CallSite::arg_iterator I = CallSite(CI).arg_begin(), |