From 5da804150d418b8b4956b84013f8f67df553c543 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 12 Dec 2012 01:11:46 +0000 Subject: LoopVectorizer: When -Os is used, vectorize only loops that dont require a tail loop. There is no testcase because I dont know of a way to initialize the loop vectorizer pass without adding an additional hidden flag. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169950 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/Vectorize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Transforms/Vectorize/Vectorize.cpp') diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index 3fb36cadea..cf7d4ee8b2 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -1,4 +1,4 @@ -//===-- Vectorize.cpp -----------------------------------------------------===// + //===-- Vectorize.cpp -----------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -39,5 +39,5 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) { } void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLoopVectorizePass()); + unwrap(PM)->add(createLoopVectorizePass(0)); } -- cgit v1.2.3-70-g09d2 From ae3b652f5cc19d83b6466d4fa70a7d1c7fb6d06c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 12 Dec 2012 19:29:45 +0000 Subject: LoopVectorizer: Use the "optsize" attribute to decide if we are allowed to increase the function size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170004 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/LinkAllPasses.h | 2 +- include/llvm/Transforms/Vectorize.h | 2 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 17 ++- lib/Transforms/Vectorize/Vectorize.cpp | 2 +- test/Transforms/LoopVectorize/small-size.ll | 170 ++++++++++++++++++++++++++++ 6 files changed, 185 insertions(+), 10 deletions(-) create mode 100644 test/Transforms/LoopVectorize/small-size.ll (limited to 'lib/Transforms/Vectorize/Vectorize.cpp') diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 48d7c40642..baf8550edc 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -156,7 +156,7 @@ namespace { (void) llvm::createCorrelatedValuePropagationPass(); (void) llvm::createMemDepPrinter(); (void) llvm::createInstructionSimplifierPass(); - (void) llvm::createLoopVectorizePass(0); + (void) llvm::createLoopVectorizePass(); (void) llvm::createBBVectorizePass(); (void)new llvm::IntervalPartition(); diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h index 81864f32ef..1ba4d22d5f 100644 --- a/include/llvm/Transforms/Vectorize.h +++ b/include/llvm/Transforms/Vectorize.h @@ -111,7 +111,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig()); // // LoopVectorize - Create a loop vectorization pass. // -Pass *createLoopVectorizePass(bool OptForSize); +Pass *createLoopVectorizePass(); //===----------------------------------------------------------------------===// /// @brief Vectorize the BasicBlock. diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 0862786127..a9a9f2eece 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -189,7 +189,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopDeletionPass()); // Delete dead loops if (LoopVectorize && OptLevel > 1) - MPM.add(createLoopVectorizePass(SizeLevel)); + MPM.add(createLoopVectorizePass()); if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index da073c5e59..749b664f53 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -53,10 +53,8 @@ namespace { struct LoopVectorize : public LoopPass { /// Pass identification, replacement for typeid static char ID; - /// Optimize for size. Do not generate tail loops. - bool OptForSize; - explicit LoopVectorize(bool OptSz = false) : LoopPass(ID), OptForSize(OptSz) { + explicit LoopVectorize() : LoopPass(ID) { initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } @@ -93,8 +91,15 @@ struct LoopVectorize : public LoopPass { VTTI = TTI->getVectorTargetTransformInfo(); // Use the cost model. LoopVectorizationCostModel CM(L, SE, &LVL, VTTI); + + // Check the function attribues to find out if this function should be + // optimized for size. + Function *F = L->getHeader()->getParent(); + bool OptForSize = + F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize); + unsigned VF = CM.selectVectorizationFactor(OptForSize, - VectorizationFactor); + VectorizationFactor); if (VF == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); @@ -2159,8 +2164,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { - Pass *createLoopVectorizePass(bool OptForSize = false) { - return new LoopVectorize(OptForSize); + Pass *createLoopVectorizePass() { + return new LoopVectorize(); } } diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index cf7d4ee8b2..19eefd2f87 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -39,5 +39,5 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) { } void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLoopVectorizePass(0)); + unwrap(PM)->add(createLoopVectorizePass()); } diff --git a/test/Transforms/LoopVectorize/small-size.ll b/test/Transforms/LoopVectorize/small-size.ll new file mode 100644 index 0000000000..deb0bb2f87 --- /dev/null +++ b/test/Transforms/LoopVectorize/small-size.ll @@ -0,0 +1,170 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 +@G = common global [32 x [1024 x i32]] zeroinitializer, align 16 +@ub = common global [1024 x i32] zeroinitializer, align 16 +@uc = common global [1024 x i32] zeroinitializer, align 16 +@d = common global [2048 x i32] zeroinitializer, align 16 +@fa = common global [1024 x float] zeroinitializer, align 16 +@fb = common global [1024 x float] zeroinitializer, align 16 +@ic = common global [1024 x i32] zeroinitializer, align 16 +@da = common global [1024 x float] zeroinitializer, align 16 +@db = common global [1024 x float] zeroinitializer, align 16 +@dc = common global [1024 x float] zeroinitializer, align 16 +@dd = common global [1024 x float] zeroinitializer, align 16 +@dj = common global [1024 x i32] zeroinitializer, align 16 + +; We can optimize this test without a tail. +;CHECK: @example1 +;CHECK: load <4 x i32> +;CHECK: add nsw <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example1() optsize { + br label %1 + +;