diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2011-01-30 16:38:43 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2011-01-30 16:38:43 +0000 |
commit | 9b108a338d544a6baf2ff087055326e301e6815d (patch) | |
tree | 345e4c3631032d90ad172e06bc26fef3c0b3ebf3 | |
parent | bb25e2c91b79fc31103510860e1817863a674bc5 (diff) |
Teach DAGCombine to fold fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2) when c1 equals the amount of bits that are truncated off.
This happens all the time when a smul is promoted to a larger type.
On x86-64 we now compile "int test(int x) { return x/10; }" into
movslq %edi, %rax
imulq $1717986919, %rax, %rax
movq %rax, %rcx
shrq $63, %rcx
sarq $34, %rax <- used to be "shrq $32, %rax; sarl $2, %eax"
addl %ecx, %eax
This fires 96 times in gcc.c on x86-64.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124559 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 | ||||
-rw-r--r-- | lib/Target/README.txt | 21 | ||||
-rw-r--r-- | test/CodeGen/X86/divide-by-constant.ll | 9 |
3 files changed, 32 insertions, 21 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a5b2d9594d..94487d4041 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3154,6 +3154,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) + // if c1 is equal to the number of bits the trunc removes + if (N0.getOpcode() == ISD::TRUNCATE && + (N0.getOperand(0).getOpcode() == ISD::SRL || + N0.getOperand(0).getOpcode() == ISD::SRA) && + N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getOperand(1).hasOneUse() && + N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { + EVT LargeVT = N0.getOperand(0).getValueType(); + ConstantSDNode *LargeShiftAmt = + cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); + + if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == + LargeShiftAmt->getZExtValue()) { + SDValue Amt = + DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), + getShiftAmountTy()); + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, + N0.getOperand(0).getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); + } + } + // Simplify, based on bits shifted out of the LHS. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 8b5c5ce137..c0a2b760de 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2274,24 +2274,3 @@ llc time when it gets inlined, because we can use smaller transfers. This also avoids partial register stalls in some important cases. //===---------------------------------------------------------------------===// - -We miss an optzn when lowering divide by some constants. For example: - int test(int x) { return x/10; } - -We produce: - -_test: ## @test -## BB#0: ## %entry - movslq %edi, %rax - imulq $1717986919, %rax, %rax ## imm = 0x66666667 - movq %rax, %rcx - shrq $63, %rcx -** shrq $32, %rax -** sarl $2, %eax - addl %ecx, %eax - ret - -The two starred instructions could be replaced with a "sarl $34, %rax". This -occurs in 186.crafty very frequently. - -//===---------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll index 545662fd0f..7ceb972f61 100644 --- a/test/CodeGen/X86/divide-by-constant.ll +++ b/test/CodeGen/X86/divide-by-constant.ll @@ -51,3 +51,12 @@ define i32 @test5(i32 %A) nounwind { ; CHECK: mull 4(%esp) } +define signext i16 @test6(i16 signext %x) nounwind { +entry: + %div = sdiv i16 %x, 10 + ret i16 %div +; CHECK: test6: +; CHECK: imull $26215, %eax, %eax +; CHECK: shrl $31, %ecx +; CHECK: sarl $18, %eax +} |