diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86Instr64bit.td | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 128 |
3 files changed, 11 insertions, 147 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 58f1d88553..c38b678376 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9595,9 +9595,13 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + if (!DCI.isBeforeLegalize()) + return SDValue(); + EVT VT = N->getValueType(0); - if (VT != MVT::i64 || !Subtarget->is64Bit()) + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) @@ -9607,6 +9611,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); SDValue ShAmt0 = N0.getOperand(1); if (ShAmt0.getValueType() != MVT::i8) @@ -9629,10 +9635,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, std::swap(ShAmt0, ShAmt1); } + unsigned Bits = VT.getSizeInBits(); if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { - if (SumC->getSExtValue() == 64 && + if (SumC->getSExtValue() == Bits && ShAmt1.getOperand(1) == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, @@ -9642,7 +9649,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) { ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0); if (ShAmt0C && - ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64) + ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), N1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, @@ -9921,7 +9928,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); - case ISD::OR: return PerformOrCombine(N, DAG, Subtarget); + case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index eef2ca0789..7b1cdbcf4a 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -2156,21 +2156,6 @@ def : Pat<(sra GR64:$src1, (and CL, 63)), def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; -// Double shift patterns -def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), - (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm)), addr:$dst), - (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), - (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm)), addr:$dst), - (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; - // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 245a20e8f0..751bbb4ffc 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -489,34 +489,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ return (~KnownZero0 & ~KnownZero1) == 0; }]>; -// 'shld' and 'shrd' instruction patterns. Note that even though these have -// the srl and shl in their patterns, the C++ code must still check for them, -// because predicates are tested before children nodes are explored. - -def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), - (or (srl node:$src1, node:$amt1), - (shl node:$src2, node:$amt2)), [{ - assert(N->getOpcode() == ISD::OR); - return N->getOperand(0).getOpcode() == ISD::SRL && - N->getOperand(1).getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) && - isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) && - N->getOperand(0).getConstantOperandVal(1) == - N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); -}]>; - -def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), - (or (shl node:$src1, node:$amt1), - (srl node:$src2, node:$amt2)), [{ - assert(N->getOpcode() == ISD::OR); - return N->getOperand(0).getOpcode() == ISD::SHL && - N->getOperand(1).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) && - isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) && - N->getOperand(0).getConstantOperandVal(1) == - N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); -}]>; - //===----------------------------------------------------------------------===// // Instruction list... // @@ -4620,106 +4592,6 @@ def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SAR32mCL addr:$dst)>; -// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) -def : Pat<(or (srl GR32:$src1, CL:$amt), - (shl GR32:$src2, (sub 32, CL:$amt))), - (SHRD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt), - (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), - (SHRD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))), - (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - (SHRD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), - (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - addr:$dst), - (SHRD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), - (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; - -// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) -def : Pat<(or (shl GR32:$src1, CL:$amt), - (srl GR32:$src2, (sub 32, CL:$amt))), - (SHLD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt), - (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), - (SHLD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))), - (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - (SHLD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), - (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - addr:$dst), - (SHLD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), - (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; - -// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) -def : Pat<(or (srl GR16:$src1, CL:$amt), - (shl GR16:$src2, (sub 16, CL:$amt))), - (SHRD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt), - (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), - (SHRD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))), - (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - (SHRD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), - (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - addr:$dst), - (SHRD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), - (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; - -// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) -def : Pat<(or (shl GR16:$src1, CL:$amt), - (srl GR16:$src2, (sub 16, CL:$amt))), - (SHLD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt), - (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), - (SHLD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))), - (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - (SHLD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), - (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - addr:$dst), - (SHLD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), - (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; - // (anyext (setcc_carry)) -> (setcc_carry) def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; |