diff options
author | Dan Gohman <gohman@apple.com> | 2008-10-21 03:29:32 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2008-10-21 03:29:32 +0000 |
commit | 279c22e6da2612f024b70e5509ffb0cad32f38b2 (patch) | |
tree | 6499655e356d56dc612145ffb3a4f9f0d9694c41 /lib/Target/X86/X86ISelLowering.cpp | |
parent | 3afda6e9d1a74456b9baa87ee6aabbc06e356433 (diff) |
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57873 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 67 |
1 files changed, 66 insertions, 1 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 05db7cb886..ebf9eecf20 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5100,6 +5100,71 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { Cond = Cmp; addTest = false; } + // Also, recognize the pattern generated by an FCMP_UNE. We can emit + // two branches instead of an explicit OR instruction with a + // separate test. + } else if (Cond.getOpcode() == ISD::OR && + Cond.hasOneUse() && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC && + Cond.getOperand(0).hasOneUse() && + Cond.getOperand(1).getOpcode() == X86ISD::SETCC && + Cond.getOperand(1).hasOneUse()) { + SDValue Cmp = Cond.getOperand(0).getOperand(1); + unsigned Opc = Cmp.getOpcode(); + if (Cmp == Cond.getOperand(1).getOperand(1) && + (Opc == X86ISD::CMP || + Opc == X86ISD::COMI || + Opc == X86ISD::UCOMI)) { + CC = Cond.getOperand(0).getOperand(0); + Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(), + Chain, Dest, CC, Cmp); + CC = Cond.getOperand(1).getOperand(0); + Cond = Cmp; + addTest = false; + } + // Also, recognize the pattern generated by an FCMP_OEQ. We can emit + // two branches instead of an explicit AND instruction with a + // separate test. However, we only do this if this block doesn't + // have a fall-through edge, because this requires an explicit + // jmp when the condition is false. + } else if (Cond.getOpcode() == ISD::AND && + Cond.hasOneUse() && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC && + Cond.getOperand(0).hasOneUse() && + Cond.getOperand(1).getOpcode() == X86ISD::SETCC && + Cond.getOperand(1).hasOneUse()) { + SDValue Cmp = Cond.getOperand(0).getOperand(1); + unsigned Opc = Cmp.getOpcode(); + if (Cmp == Cond.getOperand(1).getOperand(1) && + (Opc == X86ISD::CMP || + Opc == X86ISD::COMI || + Opc == X86ISD::UCOMI) && + Op.getNode()->hasOneUse()) { + X86::CondCode CCode = + (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); + CCode = X86::GetOppositeBranchCondition(CCode); + CC = DAG.getConstant(CCode, MVT::i8); + SDValue User = SDValue(*Op.getNode()->use_begin(), 0); + // Look for an unconditional branch following this conditional branch. + // We need this because we need to reverse the successors in order + // to implement FCMP_OEQ. + if (User.getOpcode() == ISD::BR) { + SDValue FalseBB = User.getOperand(1); + SDValue NewBR = + DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); + assert(NewBR == User); + Dest = FalseBB; + + Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(), + Chain, Dest, CC, Cmp); + X86::CondCode CCode = + (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); + CCode = X86::GetOppositeBranchCondition(CCode); + CC = DAG.getConstant(CCode, MVT::i8); + Cond = Cmp; + addTest = false; + } + } } if (addTest) { @@ -5107,7 +5172,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); } return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), - Chain, Op.getOperand(2), CC, Cond); + Chain, Dest, CC, Cond); } |