diff options
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorize.h')
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.h | 535 |
1 files changed, 0 insertions, 535 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.h b/lib/Transforms/Vectorize/LoopVectorize.h deleted file mode 100644 index 60426ad844..0000000000 --- a/lib/Transforms/Vectorize/LoopVectorize.h +++ /dev/null @@ -1,535 +0,0 @@ -//===- LoopVectorize.h --- A Loop Vectorizer ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops -// and generates target-independent LLVM-IR. Legalization of the IR is done -// in the codegen. However, the vectorizes uses (will use) the codegen -// interfaces to generate IR that is likely to result in an optimal binary. -// -// The loop vectorizer combines consecutive loop iteration into a single -// 'wide' iteration. After this transformation the index is incremented -// by the SIMD vector width, and not by one. -// -// This pass has three parts: -// 1. The main loop pass that drives the different parts. -// 2. LoopVectorizationLegality - A unit that checks for the legality -// of the vectorization. -// 3. InnerLoopVectorizer - A unit that performs the actual -// widening of instructions. -// 4. LoopVectorizationCostModel - A unit that checks for the profitability -// of vectorization. It decides on the optimal vector width, which -// can be one, if vectorization is not profitable. -// -//===----------------------------------------------------------------------===// -// -// The reduction-variable vectorization is based on the paper: -// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. -// -// Variable uniformity checks are inspired by: -// Karrenberg, R. and Hack, S. Whole Function Vectorization. -// -// Other ideas/concepts are from: -// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. -// -// S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of -// Vectorizing Compilers. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H -#define LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H - -#define LV_NAME "loop-vectorize" -#define DEBUG_TYPE LV_NAME - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/IR/IRBuilder.h" -#include <algorithm> -#include <map> - -using namespace llvm; - -/// We don't vectorize loops with a known constant trip count below this number. -const unsigned TinyTripCountThreshold = 16; - -/// When performing a runtime memory check, do not check more than this -/// number of pointers. Notice that the check is quadratic! -const unsigned RuntimeMemoryCheckThreshold = 4; - -/// This is the highest vector width that we try to generate. -const unsigned MaxVectorSize = 8; - -/// This is the highest Unroll Factor. -const unsigned MaxUnrollSize = 4; - -namespace llvm { - -// Forward declarations. -class LoopVectorizationLegality; -class LoopVectorizationCostModel; -class TargetTransformInfo; - -/// InnerLoopVectorizer vectorizes loops which contain only one basic -/// block to a specified vectorization factor (VF). -/// This class performs the widening of scalars into vectors, or multiple -/// scalars. This class also implements the following features: -/// * It inserts an epilogue loop for handling loops that don't have iteration -/// counts that are known to be a multiple of the vectorization factor. -/// * It handles the code generation for reduction variables. -/// * Scalarization (implementation using scalars) of un-vectorizable -/// instructions. -/// InnerLoopVectorizer does not perform any vectorization-legality -/// checks, and relies on the caller to check for the different legality -/// aspects. The InnerLoopVectorizer relies on the -/// LoopVectorizationLegality class to provide information about the induction -/// and reduction variables that were found to a given vectorization factor. -class InnerLoopVectorizer { -public: - InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, DataLayout *DL, unsigned VecWidth, - unsigned UnrollFactor) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), VF(VecWidth), - UF(UnrollFactor), Builder(SE->getContext()), Induction(0), - OldInduction(0), WidenMap(UnrollFactor) {} - - // Perform the actual loop widening (vectorization). - void vectorize(LoopVectorizationLegality *Legal) { - // Create a new empty loop. Unlink the old loop and connect the new one. - createEmptyLoop(Legal); - // Widen each instruction in the old loop to a new one in the new loop. - // Use the Legality module to find the induction and reduction variables. - vectorizeLoop(Legal); - // Register the new loop and update the analysis passes. - updateAnalysis(); - } - -private: - /// A small list of PHINodes. - typedef SmallVector<PHINode*, 4> PhiVector; - /// When we unroll loops we have multiple vector values for each scalar. - /// This data structure holds the unrolled and vectorized values that - /// originated from one scalar instruction. - typedef SmallVector<Value*, 2> VectorParts; - - /// Add code that checks at runtime if the accessed arrays overlap. - /// Returns the comparator value or NULL if no check is needed. - Value *addRuntimeCheck(LoopVectorizationLegality *Legal, - Instruction *Loc); - /// Create an empty loop, based on the loop ranges of the old loop. - void createEmptyLoop(LoopVectorizationLegality *Legal); - /// Copy and widen the instructions from the old loop. - void vectorizeLoop(LoopVectorizationLegality *Legal); - - /// A helper function that computes the predicate of the block BB, assuming - /// that the header block of the loop is set to True. It returns the *entry* - /// mask for the block BB. - VectorParts createBlockInMask(BasicBlock *BB); - /// A helper function that computes the predicate of the edge between SRC - /// and DST. - VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst); - - /// A helper function to vectorize a single BB within the innermost loop. - void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB, - PhiVector *PV); - - /// Insert the new loop to the loop hierarchy and pass manager - /// and update the analysis passes. - void updateAnalysis(); - - /// This instruction is un-vectorizable. Implement it as a sequence - /// of scalars. - void scalarizeInstruction(Instruction *Instr); - - /// Create a broadcast instruction. This method generates a broadcast - /// instruction (shuffle) for loop invariant values and for the induction - /// value. If this is the induction variable then we extend it to N, N+1, ... - /// this is needed because each iteration in the loop corresponds to a SIMD - /// element. - Value *getBroadcastInstrs(Value *V); - - /// This function adds 0, 1, 2 ... to each vector element, starting at zero. - /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). - /// The sequence starts at StartIndex. - Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate); - - /// When we go over instructions in the basic block we rely on previous - /// values within the current basic block or on loop invariant values. - /// When we widen (vectorize) values we place them in the map. If the values - /// are not within the map, they have to be loop invariant, so we simply - /// broadcast them into a vector. - VectorParts &getVectorValue(Value *V); - - /// Get a uniform vector of constant integers. We use this to get - /// vectors of ones and zeros for the reduction code. - Constant* getUniformVector(unsigned Val, Type* ScalarTy); - - /// Generate a shuffle sequence that will reverse the vector Vec. - Value *reverseVector(Value *Vec); - - /// This is a helper class that holds the vectorizer state. It maps scalar - /// instructions to vector instructions. When the code is 'unrolled' then - /// then a single scalar value is mapped to multiple vector parts. The parts - /// are stored in the VectorPart type. - struct ValueMap { - /// C'tor. UnrollFactor controls the number of vectors ('parts') that - /// are mapped. - ValueMap(unsigned UnrollFactor) : UF(UnrollFactor) {} - - /// \return True if 'Key' is saved in the Value Map. - bool has(Value *Key) { return MapStoreage.count(Key); } - - /// Initializes a new entry in the map. Sets all of the vector parts to the - /// save value in 'Val'. - /// \return A reference to a vector with splat values. - VectorParts &splat(Value *Key, Value *Val) { - MapStoreage[Key].clear(); - MapStoreage[Key].append(UF, Val); - return MapStoreage[Key]; - } - - ///\return A reference to the value that is stored at 'Key'. - VectorParts &get(Value *Key) { - if (!has(Key)) - MapStoreage[Key].resize(UF); - return MapStoreage[Key]; - } - - /// The unroll factor. Each entry in the map stores this number of vector - /// elements. - unsigned UF; - - /// Map storage. We use std::map and not DenseMap because insertions to a - /// dense map invalidates its iterators. - std::map<Value*, VectorParts> MapStoreage; - }; - - /// The original loop. - Loop *OrigLoop; - /// Scev analysis to use. - ScalarEvolution *SE; - /// Loop Info. - LoopInfo *LI; - /// Dominator Tree. - DominatorTree *DT; - /// Data Layout. - DataLayout *DL; - /// The vectorization SIMD factor to use. Each vector will have this many - /// vector elements. - unsigned VF; - /// The vectorization unroll factor to use. Each scalar is vectorized to this - /// many different vector instructions. - unsigned UF; - - /// The builder that we use - IRBuilder<> Builder; - - // --- Vectorization state --- - - /// The vector-loop preheader. - BasicBlock *LoopVectorPreHeader; - /// The scalar-loop preheader. - BasicBlock *LoopScalarPreHeader; - /// Middle Block between the vector and the scalar. - BasicBlock *LoopMiddleBlock; - ///The ExitBlock of the scalar loop. - BasicBlock *LoopExitBlock; - ///The vector loop body. - BasicBlock *LoopVectorBody; - ///The scalar loop body. - BasicBlock *LoopScalarBody; - ///The first bypass block. - BasicBlock *LoopBypassBlock; - - /// The new Induction variable which was added to the new block. - PHINode *Induction; - /// The induction variable of the old basic block. - PHINode *OldInduction; - /// Maps scalars to widened vectors. - ValueMap WidenMap; -}; - -/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and -/// to what vectorization factor. -/// This class does not look at the profitability of vectorization, only the -/// legality. This class has two main kinds of checks: -/// * Memory checks - The code in canVectorizeMemory checks if vectorization -/// will change the order of memory accesses in a way that will change the -/// correctness of the program. -/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory -/// checks for a number of different conditions, such as the availability of a -/// single induction variable, that all types are supported and vectorize-able, -/// etc. This code reflects the capabilities of InnerLoopVectorizer. -/// This class is also used by InnerLoopVectorizer for identifying -/// induction variable and the different reduction variables. -class LoopVectorizationLegality { -public: - LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL, - DominatorTree *DT) - : TheLoop(L), SE(SE), DL(DL), DT(DT), Induction(0) {} - - /// This enum represents the kinds of reductions that we support. - enum ReductionKind { - NoReduction, ///< Not a reduction. - IntegerAdd, ///< Sum of numbers. - IntegerMult, ///< Product of numbers. - IntegerOr, ///< Bitwise or logical OR of numbers. - IntegerAnd, ///< Bitwise or logical AND of numbers. - IntegerXor ///< Bitwise or logical XOR of numbers. - }; - - /// This enum represents the kinds of inductions that we support. - enum InductionKind { - NoInduction, ///< Not an induction variable. - IntInduction, ///< Integer induction variable. Step = 1. - ReverseIntInduction, ///< Reverse int induction variable. Step = -1. - PtrInduction ///< Pointer induction variable. Step = sizeof(elem). - }; - - /// This POD struct holds information about reduction variables. - struct ReductionDescriptor { - ReductionDescriptor() : StartValue(0), LoopExitInstr(0), Kind(NoReduction) { - } - - ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K) - : StartValue(Start), LoopExitInstr(Exit), Kind(K) {} - - // The starting value of the reduction. - // It does not have to be zero! - Value *StartValue; - // The instruction who's value is used outside the loop. - Instruction *LoopExitInstr; - // The kind of the reduction. - ReductionKind Kind; - }; - - // This POD struct holds information about the memory runtime legality - // check that a group of pointers do not overlap. - struct RuntimePointerCheck { - RuntimePointerCheck() : Need(false) {} - - /// Reset the state of the pointer runtime information. - void reset() { - Need = false; - Pointers.clear(); - Starts.clear(); - Ends.clear(); - } - - /// Insert a pointer and calculate the start and end SCEVs. - void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr); - - /// This flag indicates if we need to add the runtime check. - bool Need; - /// Holds the pointers that we need to check. - SmallVector<Value*, 2> Pointers; - /// Holds the pointer value at the beginning of the loop. - SmallVector<const SCEV*, 2> Starts; - /// Holds the pointer value at the end of the loop. - SmallVector<const SCEV*, 2> Ends; - }; - - /// A POD for saving information about induction variables. - struct InductionInfo { - InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} - InductionInfo() : StartValue(0), IK(NoInduction) {} - /// Start value. - Value *StartValue; - /// Induction kind. - InductionKind IK; - }; - - /// ReductionList contains the reduction descriptors for all - /// of the reductions that were found in the loop. - typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; - - /// InductionList saves induction variables and maps them to the - /// induction descriptor. - typedef MapVector<PHINode*, InductionInfo> InductionList; - - /// Returns true if it is legal to vectorize this loop. - /// This does not mean that it is profitable to vectorize this - /// loop, only that it is legal to do so. - bool canVectorize(); - - /// Returns the Induction variable. - PHINode *getInduction() { return Induction; } - - /// Returns the reduction variables found in the loop. - ReductionList *getReductionVars() { return &Reductions; } - - /// Returns the induction variables found in the loop. - InductionList *getInductionVars() { return &Inductions; } - - /// Returns True if V is an induction variable in this loop. - bool isInductionVariable(const Value *V); - - /// Return true if the block BB needs to be predicated in order for the loop - /// to be vectorized. - bool blockNeedsPredication(BasicBlock *BB); - - /// Check if this pointer is consecutive when vectorizing. This happens - /// when the last index of the GEP is the induction variable, or that the - /// pointer itself is an induction variable. - /// This check allows us to vectorize A[idx] into a wide load/store. - /// Returns: - /// 0 - Stride is unknown or non consecutive. - /// 1 - Address is consecutive. - /// -1 - Address is consecutive, and decreasing. - int isConsecutivePtr(Value *Ptr); - - /// Returns true if the value V is uniform within the loop. - bool isUniform(Value *V); - - /// Returns true if this instruction will remain scalar after vectorization. - bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); } - - /// Returns the information that we collected about runtime memory check. - RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; } -private: - /// Check if a single basic block loop is vectorizable. - /// At this point we know that this is a loop with a constant trip count - /// and we only need to check individual instructions. - bool canVectorizeInstrs(); - - /// When we vectorize loops we may change the order in which - /// we read and write from memory. This method checks if it is - /// legal to vectorize the code, considering only memory constrains. - /// Returns true if the loop is vectorizable - bool canVectorizeMemory(); - - /// Return true if we can vectorize this loop using the IF-conversion - /// transformation. - bool canVectorizeWithIfConvert(); - - /// Collect the variables that need to stay uniform after vectorization. - void collectLoopUniforms(); - - /// Return true if all of the instructions in the block can be speculatively - /// executed. - bool blockCanBePredicated(BasicBlock *BB); - - /// Returns True, if 'Phi' is the kind of reduction variable for type - /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. - bool AddReductionVar(PHINode *Phi, ReductionKind Kind); - /// Returns true if the instruction I can be a reduction variable of type - /// 'Kind'. - bool isReductionInstr(Instruction *I, ReductionKind Kind); - /// Returns the induction kind of Phi. This function may return NoInduction - /// if the PHI is not an induction variable. - InductionKind isInductionVariable(PHINode *Phi); - /// Return true if can compute the address bounds of Ptr within the loop. - bool hasComputableBounds(Value *Ptr); - - /// The loop that we evaluate. - Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; - /// DataLayout analysis. - DataLayout *DL; - // Dominators. - DominatorTree *DT; - - // --- vectorization state --- // - - /// Holds the integer induction variable. This is the counter of the - /// loop. - PHINode *Induction; - /// Holds the reduction variables. - ReductionList Reductions; - /// Holds all of the induction variables that we found in the loop. - /// Notice that inductions don't need to start at zero and that induction - /// variables can be pointers. - InductionList Inductions; - - /// Allowed outside users. This holds the reduction - /// vars which can be accessed from outside the loop. - SmallPtrSet<Value*, 4> AllowedExit; - /// This set holds the variables which are known to be uniform after - /// vectorization. - SmallPtrSet<Instruction*, 4> Uniforms; - /// We need to check that all of the pointers in this list are disjoint - /// at runtime. - RuntimePointerCheck PtrRtCheck; -}; - -/// LoopVectorizationCostModel - estimates the expected speedups due to -/// vectorization. -/// In many cases vectorization is not profitable. This can happen because of -/// a number of reasons. In this class we mainly attempt to predict the -/// expected speedup/slowdowns due to the supported instruction set. We use the -/// TargetTransformInfo to query the different backends for the cost of -/// different operations. -class LoopVectorizationCostModel { -public: - LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, - LoopVectorizationLegality *Legal, - const TargetTransformInfo *TTI) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI) {} - - /// \return The most profitable vectorization factor. - /// This method checks every power of two up to VF. If UserVF is not ZERO - /// then this vectorization factor will be selected if vectorization is - /// possible. - unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF); - - - /// \return The most profitable unroll factor. - /// If UserUF is non-zero then this method finds the best unroll-factor - /// based on register pressure and other parameters. - unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF); - - /// \brief A struct that represents some properties of the register usage - /// of a loop. - struct RegisterUsage { - /// Holds the number of loop invariant values that are used in the loop. - unsigned LoopInvariantRegs; - /// Holds the maximum number of concurrent live intervals in the loop. - unsigned MaxLocalUsers; - /// Holds the number of instructions in the loop. - unsigned NumInstructions; - }; - - /// \return information about the register usage of the loop. - RegisterUsage calculateRegisterUsage(); - -private: - /// Returns the expected execution cost. The unit of the cost does - /// not matter because we use the 'cost' units to compare different - /// vector widths. The cost that is returned is *not* normalized by - /// the factor width. - unsigned expectedCost(unsigned VF); - - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - unsigned getInstructionCost(Instruction *I, unsigned VF); - - /// A helper function for converting Scalar types to vector types. - /// If the incoming type is void, we return void. If the VF is 1, we return - /// the scalar type. - static Type* ToVectorTy(Type *Scalar, unsigned VF); - - /// The loop that we evaluate. - Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; - /// Loop Info analysis. - LoopInfo *LI; - /// Vectorization legality. - LoopVectorizationLegality *Legal; - /// Vector target information. - const TargetTransformInfo *TTI; -}; - -}// namespace llvm - -#endif //LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H - |