aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-02-11 06:26:33 +0000
committerChris Lattner <sabre@nondot.org>2010-02-11 06:26:33 +0000
commit8c5ad3a5da7419af4d31963c3824836b5da52478 (patch)
tree49d26c55a8eed6bffd9233c7edd0b802a639ddc7
parent2e1cdbf92dfee11fd326cede500a8a0e7e035e65 (diff)
Rename ValueRequiresCast to ShouldOptimizeCast, to better reflect
what it does. Enhance it to return false to optimizing vector sign extensions from vector comparisions, which is the idiom used to get a splatted vector for a vector comparison. Doing this breaks vector-casts.ll, add some compensating transformations to handle the important case they cover without depending on this canonicalization. This fixes rdar://7434900 a serious pessimization of vector compares. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95855 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h11
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp118
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp23
-rw-r--r--test/Transforms/InstCombine/vector-casts.ll16
4 files changed, 113 insertions, 55 deletions
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 536790004e..09accb6b85 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -199,11 +199,12 @@ private:
SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
- /// ValueRequiresCast - Return true if the cast from "V to Ty" actually
- /// results in any code being generated. It does not require codegen if V is
- /// simple enough or if the cast can be folded into other casts.
- bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
- const Type *Ty);
+ /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+ /// results in any code being generated and is interesting to optimize out. If
+ /// the cast can be eliminated by some other simple transformation, we prefer
+ /// to do the simplification first.
+ bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+ const Type *Ty);
Instruction *visitCallSite(CallSite CS);
bool transformConstExprCastCall(CallSite CS);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 28fd70ef69..515753fae4 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -932,24 +932,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
return Res;
-
+
+ // If and'ing two fcmp, try combine them into one.
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
+ return Res;
+
+
// fold (and (cast A), (cast B)) -> (cast (and A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
- if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
- if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
- const Type *SrcTy = Op0C->getOperand(0)->getType();
- if (SrcTy == Op1C->getOperand(0)->getType() &&
- SrcTy->isIntOrIntVector() &&
- // Only do this if the casts both really cause code to be generated.
- ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
- I.getType()) &&
- ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
- I.getType())) {
- Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
- Op1C->getOperand(0), I.getName());
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+ SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVector()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ // Only do this if the casts both really cause code to be generated.
+ if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
+
+ // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) {
+ InsertNewInstBefore(Res, I);
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
+
+ // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) {
+ InsertNewInstBefore(Res, I);
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
}
+ }
// (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
@@ -965,13 +990,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
- // If and'ing two fcmp, try combine them into one.
- if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
- if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
- return Res;
- }
-
return Changed ? &I : 0;
}
@@ -1669,37 +1687,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
return Res;
+ // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
+ return Res;
+
// fold (or (cast A), (cast B)) -> (cast (or A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
- if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
- !isa<ICmpInst>(Op1C->getOperand(0))) {
- const Type *SrcTy = Op0C->getOperand(0)->getType();
- if (SrcTy == Op1C->getOperand(0)->getType() &&
- SrcTy->isIntOrIntVector() &&
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVector()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
// Only do this if the casts both really cause code to be
// generated.
- ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
- I.getType()) &&
- ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
- I.getType())) {
- Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
- Op1C->getOperand(0), I.getName());
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
+
+ // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) {
+ InsertNewInstBefore(Res, I);
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
+
+ // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) {
+ InsertNewInstBefore(Res, I);
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
}
}
}
-
- // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
- if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
- if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
- return Res;
- }
-
return Changed ? &I : 0;
}
@@ -1986,10 +2018,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
// Only do this if the casts both really cause code to be generated.
- ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
- I.getType()) &&
- ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
- I.getType())) {
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
+ I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
+ I.getType())) {
Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 09cd21f144..68e17e59f6 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -255,17 +255,26 @@ isEliminableCastPair(
return Instruction::CastOps(Res);
}
-/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
-/// in any code being generated. It does not require codegen if V is simple
-/// enough or if the cast can be folded into other casts.
-bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
- const Type *Ty) {
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+ const Type *Ty) {
+ // Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false;
- // If this is another cast that can be eliminated, it isn't codegen either.
+ // If this is another cast that can be eliminated, we prefer to have it
+ // eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
- if (isEliminableCastPair(CI, opcode, Ty, TD))
+ if (isEliminableCastPair(CI, opc, Ty, TD))
return false;
+
+ // If this is a vector sext from a compare, then we don't want to break the
+ // idiom where each element of the extended vector is either zero or all ones.
+ if (opc == Instruction::SExt && isa<CmpInst>(V) && isa<VectorType>(Ty))
+ return false;
+
return true;
}
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 470d485475..24bd04dcb2 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -51,6 +51,22 @@ entry:
}
+; rdar://7434900
+define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) nounwind readnone {
+entry:
+ %cmp = fcmp ult <4 x float> %a, zeroinitializer
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
+ %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+ %and = and <4 x i32> %sext, %sext5
+ %conv = bitcast <4 x i32> %and to <2 x i64>
+ ret <2 x i64> %conv
+
+; CHECK: @test5
+; CHECK: sext <4 x i1> %cmp to <4 x i32>
+; CHECK: sext <4 x i1> %cmp4 to <4 x i32>
+}
+
define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
entry: