From cfb6285fdb27fba97ddc3ebf6485110d9a157ac5 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Tue, 11 Dec 2012 04:55:10 +0000
Subject: Fix PR14565. Don't if-convert loops that have switch statements in
 them.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169813 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index feeececedb..a69124032b 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1263,6 +1263,10 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
     BasicBlock *BB = LoopBlocks[i];
 
+    // We don't support switch statements inside loops.
+    if (!isa<BranchInst>(BB->getTerminator()))
+      return false;
+
     // We must have at most two predecessors because we need to convert
     // all PHIs to selects.
     unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
-- 
cgit v1.2.3-18-g5258


From 5e9efa10fc7eea582bb1ad216364fb2c467e4477 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Tue, 11 Dec 2012 18:58:10 +0000
Subject: Loop Vectorize: optimize the vectorization of trunc(induction_var).
 The truncation is now done on scalars.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169904 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a69124032b..9550e8a04c 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1204,8 +1204,20 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
     case Instruction::Trunc:
     case Instruction::FPTrunc:
     case Instruction::BitCast: {
-      /// Vectorize bitcasts.
       CastInst *CI = dyn_cast<CastInst>(it);
+      /// Optimize the special case where the source is the induction
+      /// variable. Notice that we can only optimize the 'trunc' case
+      /// because: a. FP conversions lose precision, b. sext/zext may wrap,
+      /// c. other casts depend on pointer size.
+      if (CI->getOperand(0) == OldInduction &&
+          it->getOpcode() == Instruction::Trunc) {
+        Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
+                                               CI->getType());
+        Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+        WidenMap[it] = getConsecutiveVector(Broadcasted);
+        break;
+      }
+      /// Vectorize casts.
       Value *A = getVectorValue(it->getOperand(0));
       Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
       WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
-- 
cgit v1.2.3-18-g5258


From 655d2c5354fcd44c329d99428c7d9196bc78dbad Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Tue, 11 Dec 2012 21:30:14 +0000
Subject: PR14574. Fix a bug in the code that calculates the mask the converted
 PHIs in if-conversion.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169916 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9550e8a04c..baa655cd59 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -954,7 +954,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
         // At this point we generate the predication tree. There may be
         // duplications since this is a simple recursive scan, but future
         // optimizations will clean it up.
-        Value *Cond = createBlockInMask(P->getIncomingBlock(0));
+        Value *Cond = createEdgeMask(P->getIncomingBlock(0), P->getParent());
         WidenMap[P] =
           Builder.CreateSelect(Cond,
                                getVectorValue(P->getIncomingValue(0)),
-- 
cgit v1.2.3-18-g5258


From 5da804150d418b8b4956b84013f8f67df553c543 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Wed, 12 Dec 2012 01:11:46 +0000
Subject: LoopVectorizer: When -Os is used, vectorize only loops that dont
 require a tail loop. There is no testcase because I dont know of a way to
 initialize the loop vectorizer pass without adding an additional hidden flag.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169950 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 82 ++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 22 deletions(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index baa655cd59..d796716cf7 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -51,9 +51,12 @@ namespace {
 
 /// The LoopVectorize Pass.
 struct LoopVectorize : public LoopPass {
-  static char ID; // Pass identification, replacement for typeid
+  /// Pass identification, replacement for typeid
+  static char ID;
+  /// Optimize for size. Do not generate tail loops.
+  bool OptForSize;
 
-  LoopVectorize() : LoopPass(ID) {
+  explicit LoopVectorize(bool OptSz = false) : LoopPass(ID), OptForSize(OptSz) {
     initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
   }
 
@@ -85,23 +88,17 @@ struct LoopVectorize : public LoopPass {
     }
 
     // Select the preffered vectorization factor.
-    unsigned VF = 1;
-    if (VectorizationFactor == 0) {
-      const VectorTargetTransformInfo *VTTI = 0;
-      if (TTI)
-        VTTI = TTI->getVectorTargetTransformInfo();
-      // Use the cost model.
-      LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
-      VF = CM.findBestVectorizationFactor();
-
-      if (VF == 1) {
-        DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
-        return false;
-      }
-
-    } else {
-      // Use the user command flag.
-      VF = VectorizationFactor;
+    const VectorTargetTransformInfo *VTTI = 0;
+    if (TTI)
+      VTTI = TTI->getVectorTargetTransformInfo();
+    // Use the cost model.
+    LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+    unsigned VF = CM.selectVectorizationFactor(OptForSize,
+                                                 VectorizationFactor);
+
+    if (VF == 1) {
+      DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+      return false;
     }
 
     DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
@@ -1886,7 +1883,48 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
 }
 
 unsigned
-LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
+                                                        unsigned UserVF) {
+  if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+    DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+    return 1;
+  }
+
+  // Find the trip count.
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
+  DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+
+  unsigned VF = MaxVectorSize;
+
+  // If we optimize the program for size, avoid creating the tail loop.
+  if (OptForSize) {
+    // If we are unable to calculate the trip count then don't try to vectorize.
+    if (TC < 2) {
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      return 1;
+    }
+
+    // Find the maximum SIMD width that can fit within the trip count.
+    VF = TC % MaxVectorSize;
+
+    if (VF == 0)
+      VF = MaxVectorSize;
+
+    // If the trip count that we found modulo the vectorization factor is not
+    // zero then we require a tail.
+    if (VF < 2) {
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      return 1;
+    }
+  }
+
+  if (UserVF != 0) {
+    assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+    DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+
+    return UserVF;
+  }
+
   if (!VTTI) {
     DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
     return 1;
@@ -2121,8 +2159,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
-  Pass *createLoopVectorizePass() {
-    return new LoopVectorize();
+  Pass *createLoopVectorizePass(bool OptForSize = false) {
+    return new LoopVectorize(OptForSize);
   }
 }
 
-- 
cgit v1.2.3-18-g5258


From 6c645a35ccc658b20506d3137890d9003edd3527 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Wed, 12 Dec 2012 01:31:10 +0000
Subject: fix a typo.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169953 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index d796716cf7..5c5e9af3b1 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1859,7 +1859,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
     if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
       return false;
 
-    // The isntructions below can trap.
+    // The instructions below can trap.
     switch (it->getOpcode()) {
     default: continue;
     case Instruction::UDiv:
-- 
cgit v1.2.3-18-g5258


From db65ff39faee4cab994d3b7ece1a31dfd9343818 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Wed, 12 Dec 2012 01:33:47 +0000
Subject: Fix the ascii drawing that was ruined when I split the H and CPP

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169955 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5c5e9af3b1..da073c5e59 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -404,27 +404,27 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
    the vectorized instructions while the old loop will continue to run the
    scalar remainder.
 
-   [ ] <-- vector loop bypass.
-   /  |
-   /   v
+       [ ] <-- vector loop bypass.
+     /  |
+    /   v
    |   [ ]     <-- vector pre header.
    |    |
    |    v
    |   [  ] \
    |   [  ]_|   <-- vector loop.
    |    |
-   \   v
-   >[ ]   <--- middle-block.
-   /  |
-   /   v
+    \   v
+      >[ ]   <--- middle-block.
+     /  |
+    /   v
    |   [ ]     <--- new preheader.
    |    |
    |    v
    |   [ ] \
    |   [ ]_|   <-- old scalar loop to handle remainder.
-   \   |
-   \  v
-   >[ ]     <-- exit block.
+    \   |
+     \  v
+      >[ ]     <-- exit block.
    ...
    */
 
-- 
cgit v1.2.3-18-g5258


From ae3b652f5cc19d83b6466d4fa70a7d1c7fb6d06c Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Wed, 12 Dec 2012 19:29:45 +0000
Subject: LoopVectorizer: Use the "optsize" attribute to decide if we are
 allowed to increase the function size.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170004 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index da073c5e59..749b664f53 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -53,10 +53,8 @@ namespace {
 struct LoopVectorize : public LoopPass {
   /// Pass identification, replacement for typeid
   static char ID;
-  /// Optimize for size. Do not generate tail loops.
-  bool OptForSize;
 
-  explicit LoopVectorize(bool OptSz = false) : LoopPass(ID), OptForSize(OptSz) {
+  explicit LoopVectorize() : LoopPass(ID) {
     initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
   }
 
@@ -93,8 +91,15 @@ struct LoopVectorize : public LoopPass {
       VTTI = TTI->getVectorTargetTransformInfo();
     // Use the cost model.
     LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+
+    // Check the function attribues to find out if this function should be
+    // optimized for size.
+    Function *F = L->getHeader()->getParent();
+    bool OptForSize =
+    F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize);
+
     unsigned VF = CM.selectVectorizationFactor(OptForSize,
-                                                 VectorizationFactor);
+                                               VectorizationFactor);
 
     if (VF == 1) {
       DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
@@ -2159,8 +2164,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
-  Pass *createLoopVectorizePass(bool OptForSize = false) {
-    return new LoopVectorize(OptForSize);
+  Pass *createLoopVectorizePass() {
+    return new LoopVectorize();
   }
 }
 
-- 
cgit v1.2.3-18-g5258


From d0b144c04c10c62ad51d0e171680d9ce95af4078 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Wed, 12 Dec 2012 19:39:36 +0000
Subject: Fix indentation.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170005 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 749b664f53..cdd130f135 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -95,11 +95,10 @@ struct LoopVectorize : public LoopPass {
     // Check the function attribues to find out if this function should be
     // optimized for size.
     Function *F = L->getHeader()->getParent();
-    bool OptForSize =
-    F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize);
+    Attributes::AttrVal SzAttr= Attributes::OptimizeForSize;
+    bool OptForSize = F->getFnAttributes().hasAttribute(SzAttr);
 
-    unsigned VF = CM.selectVectorizationFactor(OptForSize,
-                                               VectorizationFactor);
+    unsigned VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor);
 
     if (VF == 1) {
       DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
-- 
cgit v1.2.3-18-g5258


From 807dad62a0d4f1a1bbcb76fdc447634f76308252 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Thu, 13 Dec 2012 00:21:03 +0000
Subject: Teach the cost model about the optimization in r169904: Truncation of
 induction variables costs the same as scalar trunc.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170051 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index cdd130f135..475bea1485 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -106,8 +106,7 @@ struct LoopVectorize : public LoopPass {
     }
 
     DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
-          L->getHeader()->getParent()->getParent()->getModuleIdentifier()<<
-          "\n");
+          F->getParent()->getModuleIdentifier()<<"\n");
 
     // If we decided that it is *legal* to vectorizer the loop then do it.
     InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF);
@@ -1849,6 +1848,15 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
   return NoInduction;
 }
 
+bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+  Value *In0 = const_cast<Value*>(V);
+  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
+  if (!PN)
+    return false;
+
+  return Inductions.count(PN);
+}
+
 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
   assert(TheLoop->contains(BB) && "Unknown block used");
 
@@ -2110,6 +2118,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   case Instruction::Trunc:
   case Instruction::FPTrunc:
   case Instruction::BitCast: {
+    // We optimize the truncation of induction variable.
+    // The cost of these is the same as the scalar operation.
+    if (I->getOpcode() == Instruction::Trunc &&
+        Legal->isInductionVariable(I->getOperand(0)))
+         return VTTI->getCastInstrCost(I->getOpcode(), I->getType(),
+                                       I->getOperand(0)->getType());
+
     Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
     return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
   }
-- 
cgit v1.2.3-18-g5258


From 9ad73e93a51c473a90e010a489a9c7c221112030 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Thu, 13 Dec 2012 23:11:54 +0000
Subject: Enable the Loop Vectorizer by default for O2 and O3. Disable
 if-conversion by default. I plan to revert this patch later today.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170157 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 475bea1485..d143f919ce 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -44,7 +44,7 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
                     cl::desc("Sets the SIMD width. Zero is autoselect."));
 
 static cl::opt<bool>
-EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
+EnableIfConversion("enable-if-conversion", cl::init(false), cl::Hidden,
                    cl::desc("Enable if-conversion during vectorization."));
 
 namespace {
-- 
cgit v1.2.3-18-g5258