diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/BasicTargetTransformInfo.cpp | 5 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.cpp | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetTransformInfo.cpp | 17 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 51 |
5 files changed, 88 insertions, 3 deletions
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 02af2d34c5..3ef74eb2d6 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -92,6 +92,10 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return PrevTTI->getNumberOfRegisters(Vector); } +unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { + return PrevTTI->getRegisterBitWidth(Vector); +} + unsigned TargetTransformInfo::getMaximumUnrollFactor() const { return PrevTTI->getMaximumUnrollFactor(); } @@ -220,6 +224,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return 8; } + unsigned getRegisterBitWidth(bool Vector) const { + return 32; + } + unsigned getMaximumUnrollFactor() const { return 1; } diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 2f3ac9a901..3892cc4dd5 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -84,6 +84,7 @@ public: virtual unsigned getNumberOfRegisters(bool Vector) const; virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getRegisterBitWidth(bool Vector) const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; @@ -183,6 +184,10 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { return 1; } +unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { + return 32; +} + unsigned BasicTTI::getMaximumUnrollFactor() const { return 1; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 634004acb4..404a6fff11 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -94,6 +94,16 @@ public: return 16; } + unsigned getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 128; + return 0; + } + + return 32; + } + unsigned getMaximumUnrollFactor() const { // These are out of order CPUs: if (ST->isCortexA15() || ST->isSwift()) diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 6ab08cbd12..675c896d70 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -83,6 +83,7 @@ public: /// @{ virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, @@ -165,11 +166,27 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { } unsigned X86TTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasSSE1()) + return 0; + if (ST->is64Bit()) return 16; return 8; } +unsigned X86TTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAVX()) return 256; + if (ST->hasSSE1()) return 128; + return 0; + } + + if (ST->is64Bit()) + return 64; + return 32; + +} + unsigned X86TTI::getMaximumUnrollFactor() const { if (ST->isAtom()) return 1; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index c29f416be7..cde4bb889e 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -113,9 +113,6 @@ static const unsigned MaxLoopSizeThreshold = 32; /// number of pointers. Notice that the check is quadratic! static const unsigned RuntimeMemoryCheckThreshold = 4; -/// This is the highest vector width that we try to generate. -static const unsigned MaxVectorSize = 8; - namespace { // Forward declarations. @@ -523,6 +520,10 @@ public: /// possible. unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF); + /// \returns The size (in bits) of the widest type in the code that + /// needs to be vectorized. We ignore values that remain scalar such as + /// 64 bit loop indices. + unsigned getWidestType(); /// \return The most profitable unroll factor. /// If UserUF is non-zero then this method finds the best unroll-factor @@ -2621,6 +2622,20 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch()); DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n"); + unsigned WidestType = getWidestType(); + unsigned WidestRegister = TTI.getRegisterBitWidth(true); + unsigned MaxVectorSize = WidestRegister / WidestType; + DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n"); + DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n"); + + if (MaxVectorSize == 0) { + DEBUG(dbgs() << "LV: The target has no vector registers.\n"); + return 1; + } + + assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements" + " into one vector."); + unsigned VF = MaxVectorSize; // If we optimize the program for size, avoid creating the tail loop. @@ -2672,6 +2687,36 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, return Width; } +unsigned LoopVectorizationCostModel::getWidestType() { + unsigned MaxWidth = 8; + + // For each block. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + BasicBlock *BB = *bb; + + // For each instruction in the loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + if (Legal->isUniformAfterVectorization(it)) + continue; + + Type *T = it->getType(); + + if (StoreInst *ST = dyn_cast<StoreInst>(it)) + T = ST->getValueOperand()->getType(); + + // PHINodes and pointers are difficult to analyze, but we catch all other + // uses of the types in other instructions. + if (isa<PHINode>(it) || T->isPointerTy() || T->isVoidTy()) + continue; + + MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); + } + } + + return MaxWidth; +} + unsigned LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, unsigned UserUF) { |