diff options
author | Chris Lattner <sabre@nondot.org> | 2010-01-02 08:12:04 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-01-02 08:12:04 +0000 |
commit | 1f12e44b628679d003e99adc70e1ea0464e1971e (patch) | |
tree | 87cd85b662fec6d68c4b670c94c06699ad3c12c7 /lib/Transforms/Scalar/InstructionCombining.cpp | |
parent | 79fa3cf451a74dd1cec0be2d77d7ea9737dffc69 (diff) |
Teach instcombine to fold compares of loads from constant
arrays with variable indices into a comparison of the index
with a constant. The most common occurrence of this that
I see by far is stuff like:
if ("foobar"[i] == '\0') ...
which we compile into: if (i == 6), saving a load and
materialization of the global address. This also exposes
loop trip count information to later passes in many cases.
This triggers hundreds of times in xalancbmk, which is where I first
noticed it, but it also triggers in many other apps. Here are a few
interesting ones from various apps:
@must_be_connected_without = internal constant [8 x i8*] [i8* getelementptr inbounds ([3 x i8]* @.str64320, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str27283, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str71327, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str72328, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str18274, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str11267, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str32288, i64 0, i64 0), i8* null], align 32 ; <[8 x i8*]*> [#uses=2]
%scevgep.i = getelementptr [8 x i8*]* @must_be_connected_without, i64 0, i64 %indvar.i ; <i8**> [#uses=1]
%17 = load ...
%18 = icmp eq i8* %17, null ; <i1> [#uses=1]
-> icmp eq i64 %indvar.i, 7
@yytable1095 = internal constant [84 x i8] c"\12\01(\05\06\07\08\09\0A\0B\0C\0D\0E1\0F\10\11266\1D: \10\11,-,0\03'\10\11B6\04\17&\18\1945\05\06\07\08\09\0A\0B\0C\0D\0E\1E\0F\10\11*\1A\1B\1C$3+>#%;<IJ=ADFEGH9KL\00\00\00C", align 32 ; <[84 x i8]*> [#uses=2]
%57 = getelementptr inbounds [84 x i8]* @yytable1095, i64 0, i64 %56 ; <i8*> [#uses=1]
%mode.0.in = getelementptr inbounds [9 x i32]* @mb_mode_table, i64 0, i64 %.pn ; <i32*> [#uses=1]
load ...
%64 = icmp eq i8 %58, 4 ; <i1> [#uses=1]
-> icmp eq i64 %.pn, 35 ; <i1> [#uses=0]
@gsm_DLB = internal constant [4 x i16] [i16 6554, i16 16384, i16 26214, i16 32767]
%scevgep.i = getelementptr [4 x i16]* @gsm_DLB, i64 0, i64 %indvar.i ; <i16*> [#uses=1]
%425 = load %scevgep.i
%426 = icmp eq i16 %425, -32768 ; <i1> [#uses=0]
-> false
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92411 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Scalar/InstructionCombining.cpp')
-rw-r--r-- | lib/Transforms/Scalar/InstructionCombining.cpp | 121 |
1 files changed, 120 insertions, 1 deletions
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 2a0df31a5f..1fcc64efe9 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -258,6 +258,8 @@ namespace { Instruction *commonShiftTransforms(BinaryOperator &I); Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, Constant *RHSC); + Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, + GlobalVariable *GV, CmpInst &ICI); Instruction *visitFCmpInst(FCmpInst &I); Instruction *visitICmpInst(ICmpInst &I); Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); @@ -6015,6 +6017,112 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); } + +/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: +/// cmp pred (load (gep GV, ...)), cmpcst +/// where GV is a global variable with a constant initializer. Try to simplify +/// this into one or two simpler comparisons that do not need the load. For +/// example, we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into +/// "icmp eq i, 3". We assume that eliminating a load is always goodness. +Instruction *InstCombiner:: +FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, + CmpInst &ICI) { + + // There are many forms of this optimization we can handle, for now, just do + // the simple index into a single-dimensional array. + // + // Require: GEP GV, 0, i + if (GEP->getNumOperands() != 3 || + !isa<ConstantInt>(GEP->getOperand(1)) || + !cast<ConstantInt>(GEP->getOperand(1))->isZero()) + return 0; + + ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); + if (Init == 0 || Init->getNumOperands() > 1024) return 0; + + + // Variables for our state machines. + + // OnlyTrueElement - Used to emit a comparison of "i == 47", where 47 is the + // only index the condition is true for. The values are -1 -> undef, + // -2 -> overdef, >= 0 -> that index is true. + int OnlyTrueElement = -1; + + // OnlyFalseElement - Used to emit a comparison of "i != 47", where 47 is the + // only index the condition is false for. The values are -1 -> undef, + // -2 -> overdef, >= 0 -> that index is false. + int OnlyFalseElement = -1; + + // Scan the array and see if one of our patterns matches. + Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); + for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { + // Find out if the comparison would be true or false for the i'th element. + Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), + Init->getOperand(i), + CompareRHS, TD); + // If the result is undef for this element, ignore it. + if (isa<UndefValue>(C)) continue; + + // If we can't compute the result for any of the elements, we have to give + // up evaluating the entire conditional. + if (!isa<ConstantInt>(C)) return 0; + + // Otherwise, we know if the comparison is true or false for this element, + // update our state machines. + bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); + + // State machine for single index comparison. + if (IsTrueForElt) { + // If undefined -> defined. Otherwise -> overdefined. + OnlyTrueElement = OnlyTrueElement == -1 ? i : -2; + } else { + // If undefined -> defined. Otherwise -> overdefined. + OnlyFalseElement = OnlyFalseElement == -1 ? i : -2; + } + + // If all of our states become overdefined, bail out early. + if (OnlyTrueElement == -2 && OnlyFalseElement == -2) + return 0; + } + + // Now that we've scanned the entire array, emit our new comparison(s). We + // order the state machines in complexity of the generated code. + if (OnlyTrueElement != -2) { + // None true -> false. + if (OnlyTrueElement == -1) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); + + // True for one element -> 'i == 47'. + return new ICmpInst(ICmpInst::ICMP_EQ, GEP->getOperand(2), + ConstantInt::get(GEP->getOperand(2)->getType(), + OnlyTrueElement)); + } + + if (OnlyFalseElement != -2) { + // None false -> true. + if (OnlyFalseElement == -1) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); + + return new ICmpInst(ICmpInst::ICMP_NE, GEP->getOperand(2), + ConstantInt::get(GEP->getOperand(2)->getType(), + OnlyFalseElement)); + } + + assert(0 && "Should have bailed out early"); + + // TODO: FCMP. + + // TODO: Range check. + + // TODO: If the global array has 32 (or 64 if native!) or less entries, we + // can turn this into something like: + // ((magicbitconstant >> i) & 1) != 0) + // where we populate magicbitconstant with 0101010 based on the comparison + // results. + return 0; +} + + Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { bool Changed = false; @@ -6175,7 +6283,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Value *A = 0, *B = 0; // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) - if (I.isEquality() && CI->isNullValue() && + if (I.isEquality() && CI->isZero() && match(Op0, m_Sub(m_Value(A), m_Value(B)))) { // (icmp cond A B) if cond is equality return new ICmpInst(I.getPredicate(), A, B); @@ -6475,6 +6583,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); break; + + case Instruction::Load: + if (GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast<LoadInst>(LHSI)->isVolatile()) { + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + } + break; } } |