aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorPaul Redmond <paul.redmond@intel.com>2012-12-09 20:42:17 +0000
committerPaul Redmond <paul.redmond@intel.com>2012-12-09 20:42:17 +0000
commit880166684e5af0f5b4bfe26870b9f7813e537354 (patch)
tree51c6a6d48732a0ddf166bb34ac207b59b172d403 /lib/Transforms/Vectorize/LoopVectorize.cpp
parent855d0255d0bc388da7554d05f8cf184e26f5a00d (diff)
LoopVectorize: support vectorizing intrinsic calls
- added function to VectorTargetTransformInfo to query cost of intrinsics - vectorize trivially vectorizable intrinsic calls such as sin, cos, log, etc. Reviewed by: Nadav git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169711 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp57
1 files changed, 55 insertions, 2 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 66ac0b4b5e..c93c2bfa91 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -62,6 +62,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
@@ -1051,6 +1052,35 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K) {
}
}
+static bool
+isTriviallyVectorizableIntrinsic(Instruction *Inst) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+ if (!II)
+ return false;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::log:
+ case Intrinsic::log10:
+ case Intrinsic::log2:
+ case Intrinsic::fabs:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::pow:
+ case Intrinsic::fma:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
void
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
//===------------------------------------------------===//
@@ -1509,8 +1539,22 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
break;
}
+ case Instruction::Call: {
+ assert(isTriviallyVectorizableIntrinsic(it));
+ Module *M = BB->getParent()->getParent();
+ IntrinsicInst *II = cast<IntrinsicInst>(it);
+ Intrinsic::ID ID = II->getIntrinsicID();
+ SmallVector<Value*, 4> Args;
+ for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i)
+ Args.push_back(getVectorValue(II->getArgOperand(i)));
+ Type *Tys[] = { VectorType::get(II->getType()->getScalarType(), VF) };
+ Function *F = Intrinsic::getDeclaration(M, ID, Tys);
+ WidenMap[it] = Builder.CreateCall(F, Args);
+ break;
+ }
+
default:
- /// All other instructions are unsupported. Scalarize them.
+ // All other instructions are unsupported. Scalarize them.
scalarizeInstruction(it);
break;
}// end of switch.
@@ -1706,7 +1750,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// We still don't handle functions.
CallInst *CI = dyn_cast<CallInst>(it);
- if (CI) {
+ if (CI && !isTriviallyVectorizableIntrinsic(it)) {
DEBUG(dbgs() << "LV: Found a call site.\n");
return false;
}
@@ -2326,6 +2370,15 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
+ case Instruction::Call: {
+ assert(isTriviallyVectorizableIntrinsic(I));
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ Type *RetTy = ToVectorTy(II->getType(), VF);
+ SmallVector<Type*, 4> Tys;
+ for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(II->getArgOperand(i)->getType(), VF));
+ return VTTI->getIntrinsicInstrCost(II->getIntrinsicID(), RetTy, Tys);
+ }
default: {
// We are scalarizing the instruction. Return the cost of the scalar
// instruction, plus the cost of insert and extract into vector