aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2012-02-01 03:51:43 +0000
committerHal Finkel <hfinkel@anl.gov>2012-02-01 03:51:43 +0000
commitde5e5ec3045a73a06b1054417f9ac6c02929e9ce (patch)
tree02f43070729c0328db2d86e33acd94df3ab52d53
parentd0e277d272d517ca1cda368267d199f0da7cad95 (diff)
Add a basic-block autovectorization pass.
This is the initial checkin of the basic-block autovectorization pass along with some supporting vectorization infrastructure. Special thanks to everyone who helped review this code over the last several months (especially Tobias Grosser). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149468 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--docs/Passes.html21
-rw-r--r--include/llvm-c/Initialization.h1
-rw-r--r--include/llvm-c/Transforms/Vectorize.h37
-rw-r--r--include/llvm/InitializePasses.h6
-rw-r--r--include/llvm/LinkAllPasses.h2
-rw-r--r--include/llvm/Transforms/IPO/PassManagerBuilder.h1
-rw-r--r--include/llvm/Transforms/Vectorize.h30
-rw-r--r--lib/Transforms/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt2
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp14
-rw-r--r--lib/Transforms/LLVMBuild.txt2
-rw-r--r--lib/Transforms/Makefile2
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp1796
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt4
-rw-r--r--lib/Transforms/Vectorize/LLVMBuild.txt24
-rw-r--r--lib/Transforms/Vectorize/Makefile15
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp39
-rw-r--r--test/Transforms/BBVectorize/cycle.ll112
-rw-r--r--test/Transforms/BBVectorize/dg.exp3
-rw-r--r--test/Transforms/BBVectorize/ld1.ll41
-rw-r--r--test/Transforms/BBVectorize/loop1.ll93
-rw-r--r--test/Transforms/BBVectorize/req-depth.ll17
-rw-r--r--test/Transforms/BBVectorize/search-limit.ll46
-rw-r--r--test/Transforms/BBVectorize/simple-int.ll59
-rw-r--r--test/Transforms/BBVectorize/simple-ldstr.ll110
-rw-r--r--test/Transforms/BBVectorize/simple.ll152
-rw-r--r--tools/bugpoint/CMakeLists.txt2
-rw-r--r--tools/bugpoint/Makefile2
-rw-r--r--tools/llvm-ld/CMakeLists.txt2
-rw-r--r--tools/llvm-ld/Makefile2
-rw-r--r--tools/lto/CMakeLists.txt2
-rw-r--r--tools/lto/Makefile2
-rw-r--r--tools/opt/CMakeLists.txt2
-rw-r--r--tools/opt/Makefile2
-rw-r--r--tools/opt/opt.cpp1
35 files changed, 2635 insertions, 12 deletions
diff --git a/docs/Passes.html b/docs/Passes.html
index 5c42f3fdd5..840b2eff06 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -126,6 +126,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<tr><td><a href="#adce">-adce</a></td><td>Aggressive Dead Code Elimination</td></tr>
<tr><td><a href="#always-inline">-always-inline</a></td><td>Inliner for always_inline functions</td></tr>
<tr><td><a href="#argpromotion">-argpromotion</a></td><td>Promote 'by reference' arguments to scalars</td></tr>
+<tr><td><a href="#bb-vectorize">-bb-vectorize</a></td><td>Combine instructions to form vector instructions within basic blocks</td></tr>
<tr><td><a href="#block-placement">-block-placement</a></td><td>Profile Guided Basic Block Placement</td></tr>
<tr><td><a href="#break-crit-edges">-break-crit-edges</a></td><td>Break critical edges in CFG</td></tr>
<tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Optimize for code generation</td></tr>
@@ -817,6 +818,26 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<!-------------------------------------------------------------------------- -->
<h3>
+ <a name="bb-vectorize">-bb-vectorize: Basic-Block Vectorization</a>
+</h3>
+<div>
+ <p>This pass combines instructions inside basic blocks to form vector
+ instructions. It iterates over each basic block, attempting to pair
+ compatible instructions, repeating this process until no additional
+ pairs are selected for vectorization. When the outputs of some pair
+ of compatible instructions are used as inputs by some other pair of
+ compatible instructions, those pairs are part of a potential
+ vectorization chain. Instruction pairs are only fused into vector
+ instructions when they are part of a chain longer than some
+ threshold length. Moreover, the pass attempts to find the best
+ possible chain for each pair of compatible instructions. These
+ heuristics are intended to prevent vectorization in cases where
+ it would not yield a performance increase of the resulting code.
+ </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<h3>
<a name="block-placement">-block-placement: Profile Guided Basic Block Placement</a>
</h3>
<div>
diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h
index 3b59abbec0..cbe60df908 100644
--- a/include/llvm-c/Initialization.h
+++ b/include/llvm-c/Initialization.h
@@ -25,6 +25,7 @@ extern "C" {
void LLVMInitializeCore(LLVMPassRegistryRef R);
void LLVMInitializeTransformUtils(LLVMPassRegistryRef R);
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R);
+void LLVMInitializeVectorization(LLVMPassRegistryRef R);
void LLVMInitializeInstCombine(LLVMPassRegistryRef R);
void LLVMInitializeIPO(LLVMPassRegistryRef R);
void LLVMInitializeInstrumentation(LLVMPassRegistryRef R);
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
new file mode 100644
index 0000000000..178465ac82
--- /dev/null
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -0,0 +1,37 @@
+/*===---------------------------Vectorize.h ------------------- -*- C++ -*-===*\
+|*===----------- Vectorization Transformation Library C Interface ---------===*|
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header declares the C interface to libLLVMVectorize.a, which *|
+|* implements various vectorization transformations of the LLVM IR. *|
+|* *|
+|* Many exotic languages can interoperate with C code but have a harder time *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TRANSFORMS_VECTORIZE_H
+#define LLVM_C_TRANSFORMS_VECTORIZE_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** See llvm::createBBVectorizePass function. */
+void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);
+
+#ifdef __cplusplus
+}
+#endif /* defined(__cplusplus) */
+
+#endif
+
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 6cc3f195c7..ac129c77d5 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -31,6 +31,10 @@ void initializeTransformUtils(PassRegistry&);
/// ScalarOpts library.
void initializeScalarOpts(PassRegistry&);
+/// initializeVectorization - Initialize all passes linked into the
+/// Vectorize library.
+void initializeVectorization(PassRegistry&);
+
/// initializeInstCombine - Initialize all passes linked into the
/// ScalarOpts library.
void initializeInstCombine(PassRegistry&);
@@ -236,7 +240,7 @@ void initializeVirtRegMapPass(PassRegistry&);
void initializeInstSimplifierPass(PassRegistry&);
void initializeUnpackMachineBundlesPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
-
+void initializeBBVectorizePass(PassRegistry&);
}
#endif
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 537fab757b..2258d45ce9 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -31,6 +31,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
#include <cstdlib>
@@ -151,6 +152,7 @@ namespace {
(void) llvm::createCorrelatedValuePropagationPass();
(void) llvm::createMemDepPrinter();
(void) llvm::createInstructionSimplifierPass();
+ (void) llvm::createBBVectorizePass();
(void)new llvm::IntervalPartition();
(void)new llvm::FindUsedTypes();
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index b4ba184781..a1b4f5cd90 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -99,6 +99,7 @@ public:
bool DisableSimplifyLibCalls;
bool DisableUnitAtATime;
bool DisableUnrollLoops;
+ bool Vectorize;
private:
/// ExtensionList - This is list of all of the extensions that are registered.
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
new file mode 100644
index 0000000000..dfc099ddb9
--- /dev/null
+++ b/include/llvm/Transforms/Vectorize.h
@@ -0,0 +1,30 @@
+//===-- Vectorize.h - Vectorization Transformations -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes for accessor functions that expose passes
+// in the Vectorize transformations library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_H
+#define LLVM_TRANSFORMS_VECTORIZE_H
+
+namespace llvm {
+
+class BasicBlockPass;
+
+//===----------------------------------------------------------------------===//
+//
+// BBVectorize - A basic-block vectorization pass.
+//
+BasicBlockPass *createBBVectorizePass();
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
index 10e0cc6b56..de1353e6c1 100644
--- a/lib/Transforms/CMakeLists.txt
+++ b/lib/Transforms/CMakeLists.txt
@@ -3,4 +3,5 @@ add_subdirectory(Instrumentation)
add_subdirectory(InstCombine)
add_subdirectory(Scalar)
add_subdirectory(IPO)
+add_subdirectory(Vectorize)
add_subdirectory(Hello)
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index b358fabdfa..b18c9150f4 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = IPO
parent = Transforms
library_name = ipo
-required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils
+required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index afd25dc317..84084374b3 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -21,14 +21,20 @@
#include "llvm/DefaultPasses.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
+static cl::opt<bool>
+RunVectorization("vectorize", cl::desc("Run vectorization passes"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -37,6 +43,7 @@ PassManagerBuilder::PassManagerBuilder() {
DisableSimplifyLibCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
+ Vectorize = RunVectorization;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -172,6 +179,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+ if (Vectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ if (OptLevel > 1)
+ MPM.add(createGVNPass()); // Remove redundancies
+ }
+
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
index b2ef49a4c6..f7bca064c7 100644
--- a/lib/Transforms/LLVMBuild.txt
+++ b/lib/Transforms/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = IPO InstCombine Instrumentation Scalar Utils
+subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize
[component_0]
type = Group
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index e527be25de..8b1df92fa2 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -8,7 +8,7 @@
##===----------------------------------------------------------------------===##
LEVEL = ../..
-PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello
include $(LEVEL)/Makefile.config
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
new file mode 100644
index 0000000000..9c2c8dd51b
--- /dev/null
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -0,0 +1,1796 @@
+//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a basic-block vectorization pass. The algorithm was
+// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral,
+// et al. It works by looking for chains of pairable operations and then
+// pairing them.
+//
+//===----------------------------------------------------------------------===//
+
+#define BBV_NAME "bb-vectorize"
+#define DEBUG_TYPE BBV_NAME
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Vectorize.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+static cl::opt<unsigned>
+ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
+ cl::desc("The required chain depth for vectorization"));
+
+static cl::opt<unsigned>
+SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
+ cl::desc("The maximum search distance for instruction pairs"));
+
+static cl::opt<bool>
+SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden,
+ cl::desc("Replicating one element to a pair breaks the chain"));
+
+static cl::opt<unsigned>
+VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden,
+ cl::desc("The size of the native vector registers"));
+
+static cl::opt<unsigned>
+MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
+ cl::desc("The maximum number of pairing iterations"));
+
+static cl::opt<unsigned>
+MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
+ cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
+ " a full cycle check"));
+
+static cl::opt<bool>
+NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize integer values"));
+
+static cl::opt<bool>
+NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize floating-point values"));
+
+static cl::opt<bool>
+NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize casting (conversion) operations"));
+
+static cl::opt<bool>
+NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize floating-point math intrinsics"));
+
+static cl::opt<bool>
+NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
+
+static cl::opt<bool>
+NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize loads and stores"));
+
+static cl::opt<bool>
+AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden,
+ cl::desc("Only generate aligned loads and stores"));
+
+static cl::opt<bool>
+FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden,
+ cl::desc("Use a fast instruction dependency analysis"));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+DebugInstructionExamination("bb-vectorize-debug-instruction-examination",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " instruction-examination process"));
+static cl::opt<bool>
+DebugCandidateSelection("bb-vectorize-debug-candidate-selection",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " candidate-selection process"));
+static cl::opt<bool>
+DebugPairSelection("bb-vectorize-debug-pair-selection",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " pair-selection process"));
+static cl::opt<bool>
+DebugCycleCheck("bb-vectorize-debug-cycle-check",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, output information on the"
+ " cycle-checking process"));
+#endif
+
+STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
+
+namespace {
+ struct BBVectorize : public BasicBlockPass {
+ static char ID; // Pass identification, replacement for typeid
+ BBVectorize() : BasicBlockPass(ID) {
+ initializeBBVectorizePass(*PassRegistry::getPassRegistry());
+ }
+
+ typedef std::pair<Value *, Value *> ValuePair;
+ typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
+ typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
+ typedef std::pair<std::multimap<Value *, Value *>::iterator,
+ std::multimap<Value *, Value *>::iterator> VPIteratorPair;
+ typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
+ std::multimap<ValuePair, ValuePair>::iterator>
+ VPPIteratorPair;
+
+ AliasAnalysis *AA;
+ ScalarEvolution *SE;
+ TargetData *TD;
+
+ // FIXME: const correct?
+
+ bool vectorizePairs(BasicBlock &BB);
+
+ void getCandidatePairs(BasicBlock &BB,
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts);
+
+ void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs);
+
+ void buildDepMap(BasicBlock &BB,
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &PairableInstUsers);
+
+ void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *>& ChosenPairs);
+
+ void fuseChosenPairs(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *>& ChosenPairs);
+
+ bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
+
+ bool areInstsCompatible(Instruction *I, Instruction *J,
+ bool IsSimpleLoadStore);
+
+ bool trackUsesOfI(DenseSet<Value *> &Users,
+ AliasSetTracker &WriteSet, Instruction *I,
+ Instruction *J, bool UpdateUsers = true,
+ std::multimap<Value *, Value *> *LoadMoveSet = 0);
+
+ void computePairsConnectedTo(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ ValuePair P);
+
+ bool pairsConflict(ValuePair P, ValuePair Q,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0);
+
+ bool pairWillFormCycle(ValuePair P,
+ std::multimap<ValuePair, ValuePair> &PairableInstUsers,
+ DenseSet<ValuePair> &CurrentPairs);
+
+ void pruneTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &Tree,
+ DenseSet<ValuePair> &PrunedTree, ValuePair J,
+ bool UseCycleCheck);
+
+ void buildInitialTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &Tree, ValuePair J);
+
+ void findBestTreeFor(
+ std::multimap<Value *, Value *> &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
+ size_t &BestEffSize, VPIteratorPair ChoiceRange,
+ bool UseCycleCheck);
+
+ Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, bool &FlipMemInputs);
+
+ void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
+ unsigned NumElem, unsigned MaskOffset, unsigned NumInElem,
+ unsigned IdxOffset, std::vector<Constant*> &Mask);
+
+ Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
+ Instruction *J);
+
+ Value *getReplacementInput(LLVMContext& Context, Instruction *I,
+ Instruction *J, unsigned o, bool FlipMemInputs);
+
+ void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
+ Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
+ bool &FlipMemInputs);
+
+ void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
+ Instruction *J, Instruction *K,
+ Instruction *&InsertionPt, Instruction *&K1,
+ Instruction *&K2, bool &FlipMemInputs);
+
+ void collectPairLoadMoveSet(BasicBlock &BB,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *I);
+
+ void collectLoadMoveSet(BasicBlock &BB,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ std::multimap<Value *, Value *> &LoadMoveSet);
+
+ bool canMoveUsesOfIAfterJ(BasicBlock &BB,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *I, Instruction *J);
+
+ void moveUsesOfIAfterJ(BasicBlock &BB,
+ std::multimap<Value *, Value *> &LoadMoveSet,
+ Instruction *&InsertionPt,
+ Instruction *I, Instruction *J);
+
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ AA = &getAnalysis<AliasAnalysis>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ bool changed = false;
+ // Iterate a sufficient number of times to merge types of size 1 bit,
+ // then 2 bits, then 4, etc. up to half of the target vector width of the
+ // target vector register.
+ for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n <= MaxIter);
+ v *= 2, ++n) {
+ DEBUG(dbgs() << "BBV: fusing loop #" << n <<
+ " for " << BB.getName() << " in " <<
+ BB.getParent()->getName() << "...\n");
+ if (vectorizePairs(BB))
+ changed = true;
+ else
+ break;
+ }
+
+ DEBUG(dbgs() << "BBV: done!\n");
+ return changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ BasicBlockPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ }
+
+ // This returns the vector type that holds a pair of the provided type.
+ // If the provided type is already a vector, then its length is doubled.
+ static inline VectorType *getVecTypeForPair(Type *ElemTy) {
+ if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
+ unsigned numElem = VTy->getNumElements();
+ return VectorType::get(ElemTy->getScalarType(), numElem*2);
+ } else {
+ return VectorType::get(ElemTy, 2);
+ }
+ }
+
+ // Returns the weight associated with the provided value. A chain of
+ // candidate pairs has a length given by the sum of the weights of its
+ // members (one weight per pair; the weight of each member of the pair
+ // is assumed to be the same). This length is then compared to the
+ // chain-length threshold to determine if a given chain is significant
+ // enough to be vectorized. The length is also used in comparing
+ // candidate chains where longer chains are considered to be better.
+ // Note: when this function returns 0, the resulting instructions are
+ // not actually fused.
+ static inline size_t getDepthFactor(Value *V) {
+ // InsertElement and ExtractElement have a depth factor of zero. This is
+ // for two reasons: First, they cannot be usefully fused. Second, because
+ // the pass generates a lot of these, they can confuse the simple metric
+ // used to compare the trees in the next iteration. Thus, giving them a
+ // weight of zero allows the pass to essentially ignore them in
+ // subsequent iterations when looking for vectorization opportunities
+ // while still tracking dependency chains that flow through those
+ // instructions.
+ if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
+ return 0;
+
+ return 1;
+ }
+
+ // This determines the relative offset of two loads or stores, returning
+ // true if the offset could be determined to be some constant value.
+ // For example, if OffsetInElmts == 1, then J accesses the memory directly
+ // after I; if OffsetInElmts == -1 then I accesses the memory
+ // directly after J. This function assumes that both instructions
+ // have the same type.
+ bool getPairPtrInfo(Instruction *I, Instruction *J,
+ Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
+ int64_t &OffsetInElmts) {
+ OffsetInElmts = 0;
+ if (isa<LoadInst>(I)) {
+ IPt