diff options
author | Chris Lattner <sabre@nondot.org> | 2010-02-11 06:26:33 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-02-11 06:26:33 +0000 |
commit | 8c5ad3a5da7419af4d31963c3824836b5da52478 (patch) | |
tree | 49d26c55a8eed6bffd9233c7edd0b802a639ddc7 | |
parent | 2e1cdbf92dfee11fd326cede500a8a0e7e035e65 (diff) |
Rename ValueRequiresCast to ShouldOptimizeCast, to better reflect
what it does. Enhance it to return false to optimizing vector
sign extensions from vector comparisions, which is the idiom used
to get a splatted vector for a vector comparison.
Doing this breaks vector-casts.ll, add some compensating
transformations to handle the important case they cover without
depending on this canonicalization.
This fixes rdar://7434900 a serious pessimization of vector compares.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95855 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 11 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 118 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 23 | ||||
-rw-r--r-- | test/Transforms/InstCombine/vector-casts.ll | 16 |
4 files changed, 113 insertions, 55 deletions
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 536790004e..09accb6b85 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -199,11 +199,12 @@ private: SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); - /// ValueRequiresCast - Return true if the cast from "V to Ty" actually - /// results in any code being generated. It does not require codegen if V is - /// simple enough or if the cast can be folded into other casts. - bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty); + /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually + /// results in any code being generated and is interesting to optimize out. If + /// the cast can be eliminated by some other simple transformation, we prefer + /// to do the simplification first. + bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, + const Type *Ty); Instruction *visitCallSite(CallSite CS); bool transformConstExprCastCall(CallSite CS); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 28fd70ef69..515753fae4 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -932,24 +932,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) return Res; - + + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) + return Res; + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { - Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); + if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? + SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + // Only do this if the casts both really cause code to be generated. + if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is and(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } + + // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } } + } // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { @@ -965,13 +990,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } - // If and'ing two fcmp, try combine them into one. - if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) - return Res; - } - return Changed ? &I : 0; } @@ -1669,37 +1687,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) return Res; + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) + return Res; + // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - if (!isa<ICmpInst>(Op0C->getOperand(0)) || - !isa<ICmpInst>(Op1C->getOperand(0))) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && // Only do this if the casts both really cause code to be // generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { - Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); + ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } + + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } } } } - - // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) - if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) - return Res; - } - return Changed ? &I : 0; } @@ -1986,10 +2018,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { const Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { + ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 09cd21f144..68e17e59f6 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -255,17 +255,26 @@ isEliminableCastPair( return Instruction::CastOps(Res); } -/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results -/// in any code being generated. It does not require codegen if V is simple -/// enough or if the cast can be folded into other casts. -bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty) { +/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually +/// results in any code being generated and is interesting to optimize out. If +/// the cast can be eliminated by some other simple transformation, we prefer +/// to do the simplification first. +bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, + const Type *Ty) { + // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; - // If this is another cast that can be eliminated, it isn't codegen either. + // If this is another cast that can be eliminated, we prefer to have it + // eliminated. if (const CastInst *CI = dyn_cast<CastInst>(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) + if (isEliminableCastPair(CI, opc, Ty, TD)) return false; + + // If this is a vector sext from a compare, then we don't want to break the + // idiom where each element of the extended vector is either zero or all ones. + if (opc == Instruction::SExt && isa<CmpInst>(V) && isa<VectorType>(Ty)) + return false; + return true; } diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll index 470d485475..24bd04dcb2 100644 --- a/test/Transforms/InstCombine/vector-casts.ll +++ b/test/Transforms/InstCombine/vector-casts.ll @@ -51,6 +51,22 @@ entry: } +; rdar://7434900 +define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) nounwind readnone { +entry: + %cmp = fcmp ult <4 x float> %a, zeroinitializer + %sext = sext <4 x i1> %cmp to <4 x i32> + %cmp4 = fcmp ult <4 x float> %b, zeroinitializer + %sext5 = sext <4 x i1> %cmp4 to <4 x i32> + %and = and <4 x i32> %sext, %sext5 + %conv = bitcast <4 x i32> %and to <2 x i64> + ret <2 x i64> %conv + +; CHECK: @test5 +; CHECK: sext <4 x i1> %cmp to <4 x i32> +; CHECK: sext <4 x i1> %cmp4 to <4 x i32> +} + define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind { entry: |