diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-04-14 05:15:53 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-04-14 05:15:53 +0000 |
commit | ab105ae95fc473c19d9f0b019fc7c7a16d17b1a5 (patch) | |
tree | 27247e73ffdef91c52268c935d65afd7c066c1c0 | |
parent | 618eda7a60bafff7741a988e27b98bf81d27cb89 (diff) |
SLPVectorizer: Add support for trees that don't start at binary operators, and add the cost of extracting values from the roots of the tree.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179475 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VecUtils.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VecUtils.h | 7 | ||||
-rw-r--r-- | test/Transforms/SLPVectorizer/X86/reduction2.ll | 37 |
4 files changed, 62 insertions, 7 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2f55a007f2..d94b2b2a0e 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -85,14 +85,16 @@ struct SLPVectorizer : public BasicBlockPass { return true; } - bool tryToVectorizePair(BinaryOperator *A, BinaryOperator *B, BoUpSLP &R) { + bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { if (!A || !B) return false; BoUpSLP::ValueList VL; VL.push_back(A); VL.push_back(B); int Cost = R.getTreeCost(VL); - DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << ".\n"); - if (Cost >= -SLPCostThreshold) return false; + int ExtrCost = R.getScalarizationCost(VL); + DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << + " Cost of extract:" << ExtrCost << ".\n"); + if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; DEBUG(dbgs()<<"SLP: Vectorizing pair.\n"); R.vectorizeArith(VL); return true; @@ -100,11 +102,12 @@ struct SLPVectorizer : public BasicBlockPass { bool tryToVectorizeCandidate(BinaryOperator *V, BoUpSLP &R) { if (!V) return false; - BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); - BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); // Try to vectorize V. - if (tryToVectorizePair(A, B, R)) return true; + if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) + return true; + BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); + BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); // Try to skip B. if (B && B->hasOneUse()) { BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 4d075c505d..584f3d9778 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -173,6 +173,16 @@ bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) { return Changed; } +int BoUpSLP::getScalarizationCost(ValueList &VL) { + Type *ScalarTy = VL[0]->getType(); + + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + + VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + return getScalarizationCost(VecTy); +} + int BoUpSLP::getScalarizationCost(Type *Ty) { int Cost = 0; for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h index f865236ff8..edebcb3e27 100644 --- a/lib/Transforms/Vectorize/VecUtils.h +++ b/lib/Transforms/Vectorize/VecUtils.h @@ -61,6 +61,11 @@ struct BoUpSLP { /// A negative number means that this is profitable. int getTreeCost(ValueList &VL); + /// \returns the scalarization cost for this ValueList. Assuming that this + /// subtree gets vectorized, we may need to extract the values from the + /// roots. This method calculates the cost of extracting the values. + int getScalarizationCost(ValueList &VL); + /// \brief Attempts to order and vectorize a sequence of stores. This /// function does a quadratic scan of the given stores. /// \returns true if the basic block was modified. @@ -118,7 +123,7 @@ private: /// by multiple lanes, or by users outside the tree. /// NOTICE: The vectorization methods also use this set. ValueSet MustScalarize; - + // Contains a list of values that are used outside the current tree. This // set must be reset between runs. ValueSet MultiUserVals; diff --git a/test/Transforms/SLPVectorizer/X86/reduction2.ll b/test/Transforms/SLPVectorizer/X86/reduction2.ll new file mode 100644 index 0000000000..9b5d5f701d --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" +target triple = "i386-apple-macosx10.8.0" + +;CHECK: @foo +;CHECK: load <2 x double> +;CHECK: ret +define double @foo(double* nocapture %D) #0 { + br label %1 + +; <label>:1 ; preds = %1, %0 + %i.02 = phi i32 [ 0, %0 ], [ %10, %1 ] + %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ] + %2 = shl nsw i32 %i.02, 1 + %3 = getelementptr inbounds double* %D, i32 %2 + %4 = load double* %3, align 4, !tbaa !0 + %A4 = fmul double %4, %4 + %5 = or i32 %2, 1 + %6 = getelementptr inbounds double* %D, i32 %5 + %7 = load double* %6, align 4, !tbaa !0 + %A7 = fmul double %7, %7 + %8 = fadd double %A4, %A7 + %9 = fadd double %sum.01, %8 + %10 = add nsw i32 %i.02, 1 + %exitcond = icmp eq i32 %10, 100 + br i1 %exitcond, label %11, label %1 + +; <label>:11 ; preds = %1 + ret double %9 +} + +attributes #0 = { nounwind readonly ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = metadata !{metadata !"double", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} |