aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2012-12-26 01:36:57 +0000
committerHal Finkel <hfinkel@anl.gov>2012-12-26 01:36:57 +0000
commita77728415857196035c0090f7b2749d7971811a2 (patch)
tree2287763ac8dcffdaf6c91d091d1bb04e84ddf4f0
parent0f77910e6fb2c7fccc9643df8b6859c742d678a2 (diff)
BBVectorize: Use VTTI to compute costs for intrinsics vectorization
For the time being this includes only some dummy test cases. Once the generic implementation of the intrinsics cost function does something other than assuming scalarization in all cases, or some target specializes the interface, some real test cases can be added. Also, for consistency, I changed the type of IID from unsigned to Intrinsic::ID in a few other places. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171079 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp76
-rw-r--r--test/Transforms/BBVectorize/X86/simple-int.ll79
2 files changed, 143 insertions, 12 deletions
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 4a2c878195..f9e9b3fbc4 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -642,7 +642,7 @@ namespace {
Function *F = I->getCalledFunction();
if (!F) return false;
- unsigned IID = F->getIntrinsicID();
+ Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
if (!IID) return false;
switch(IID) {
@@ -1020,14 +1020,67 @@ namespace {
// vectorized, the second arguments must be equal.
CallInst *CI = dyn_cast<CallInst>(I);
Function *FI;
- if (CI && (FI = CI->getCalledFunction()) &&
- FI->getIntrinsicID() == Intrinsic::powi) {
-
- Value *A1I = CI->getArgOperand(1),
- *A1J = cast<CallInst>(J)->getArgOperand(1);
- const SCEV *A1ISCEV = SE->getSCEV(A1I),
- *A1JSCEV = SE->getSCEV(A1J);
- return (A1ISCEV == A1JSCEV);
+ if (CI && (FI = CI->getCalledFunction())) {
+ Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
+ if (IID == Intrinsic::powi) {
+ Value *A1I = CI->getArgOperand(1),
+ *A1J = cast<CallInst>(J)->getArgOperand(1);
+ const SCEV *A1ISCEV = SE->getSCEV(A1I),
+ *A1JSCEV = SE->getSCEV(A1J);
+ return (A1ISCEV == A1JSCEV);
+ }
+
+ if (IID && VTTI) {
+ SmallVector<Type*, 4> Tys;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(CI->getArgOperand(i)->getType());
+ unsigned ICost = VTTI->getIntrinsicInstrCost(IID, IT1, Tys);
+
+ Tys.clear();
+ CallInst *CJ = cast<CallInst>(J);
+ for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(CJ->getArgOperand(i)->getType());
+ unsigned JCost = VTTI->getIntrinsicInstrCost(IID, JT1, Tys);
+
+ Tys.clear();
+ assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
+ "Intrinsic argument counts differ");
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (IID == Intrinsic::powi && i == 1)
+ Tys.push_back(CI->getArgOperand(i)->getType());
+ else
+ Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
+ CJ->getArgOperand(i)->getType()));
+ }
+
+ Type *RetTy = getVecTypeForPair(IT1, JT1);
+ unsigned VCost = VTTI->getIntrinsicInstrCost(IID, RetTy, Tys);
+
+ if (VCost > ICost + JCost)
+ return false;
+
+ // We don't want to fuse to a type that will be split, even
+ // if the two input types will also be split and there is no other
+ // associated cost.
+ unsigned RetParts = VTTI->getNumberOfParts(RetTy);
+ if (RetParts > 1)
+ return false;
+ else if (!RetParts && VCost == ICost + JCost)
+ return false;
+
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (!Tys[i]->isVectorTy())
+ continue;
+
+ unsigned NumParts = VTTI->getNumberOfParts(Tys[i]);
+ if (NumParts > 1)
+ return false;
+ else if (!NumParts && VCost == ICost + JCost)
+ return false;
+ }
+
+ CostSavings = ICost + JCost - VCost;
+ }
}
return true;
@@ -2551,7 +2604,7 @@ namespace {
continue;
} else if (isa<CallInst>(I)) {
Function *F = cast<CallInst>(I)->getCalledFunction();
- unsigned IID = F->getIntrinsicID();
+ Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
if (o == NumOperands-1) {
BasicBlock &BB = *I->getParent();
@@ -2560,8 +2613,7 @@ namespace {
Type *ArgTypeJ = J->getType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- ReplacedOperands[o] = Intrinsic::getDeclaration(M,
- (Intrinsic::ID) IID, VArgType);
+ ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
continue;
} else if (IID == Intrinsic::powi && o == 1) {
// The second argument of powi is a single integer and we've already
diff --git a/test/Transforms/BBVectorize/X86/simple-int.ll b/test/Transforms/BBVectorize/X86/simple-int.ll
new file mode 100644
index 0000000000..f5dbe46b14
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/simple-int.ll
@@ -0,0 +1,79 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.powi.f64(double, i32)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+ %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test1
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+ %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test1a
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = call double @llvm.cos.f64(double %X1)
+ %Y2 = call double @llvm.cos.f64(double %X2)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi
+define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+ %Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test3
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi (different powers: should not vectorize)
+define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %P2 = add i32 %P, 1
+ %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+ %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test4
+; CHECK: ret double %R
+}
+