diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 | ||||
-rw-r--r-- | lib/Target/X86/X86Instr64bit.td | 32 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 32 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 92 |
4 files changed, 145 insertions, 25 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f546ed4467..4b1945efe4 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2053,13 +2053,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS) { } } - // Look for sign/zext/any-extended cases: + // Look for sign/zext/any-extended or truncate cases: if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND) && + || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND)) { + || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { SDValue LExtOp0 = LHSShiftAmt.getOperand(0); SDValue RExtOp0 = RHSShiftAmt.getOperand(0); if (RExtOp0.getOpcode() == ISD::SUB && @@ -2068,7 +2070,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS) { // (rotl x, y) // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = cast<ConstantSDNode>(RExtOp0.getOperand(0))) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); @@ -2080,7 +2083,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS) { // (rotr x, y) // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = cast<ConstantSDNode>(LExtOp0.getOperand(0))) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, VT, LHSShiftArg, HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 026c359bf9..5085f54385 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1397,6 +1397,22 @@ def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt), (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), (SHRD64mrCL addr:$dst, GR64:$src2)>; +def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))), + (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), + (SHRD64rrCL GR64:$src1, GR64:$src2)>; + +def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), + (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), + addr:$dst), + (SHRD64mrCL addr:$dst, GR64:$src2)>; + +def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), + (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), + GR64:$src2, (i8 imm:$amt2)), addr:$dst), + (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; + // (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) def : Pat<(or (shl GR64:$src1, CL:$amt), (srl GR64:$src2, (sub 64, CL:$amt))), @@ -1406,6 +1422,22 @@ def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt), (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), (SHLD64mrCL addr:$dst, GR64:$src2)>; +def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))), + (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), + (SHLD64rrCL GR64:$src1, GR64:$src2)>; + +def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), + (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), + addr:$dst), + (SHLD64mrCL addr:$dst, GR64:$src2)>; + +def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), + (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), + GR64:$src2, (i8 imm:$amt2)), addr:$dst), + (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; + // X86 specific add which produces a flag. def : Pat<(addc GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e105b0f3cd..b19c8b9031 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1248,26 +1248,14 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; } unsigned Amt = MI->getOperand(3).getImm(); - unsigned A = MI->getOperand(0).getReg(); - unsigned B = MI->getOperand(1).getReg(); - unsigned C = MI->getOperand(2).getReg(); - bool AisDead = MI->getOperand(0).isDead(); - bool BisKill = MI->getOperand(1).isKill(); - bool CisKill = MI->getOperand(2).isKill(); - // If machine instrs are no longer in two-address forms, update - // destination register as well. - if (A == B) { - // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && - "Expecting a two-address instruction!"); - A = C; - CisKill = false; + if (NewMI) { + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + NewMI = false; } - MachineFunction &MF = *MI->getParent()->getParent(); - return BuildMI(MF, get(Opc)) - .addReg(A, true, false, false, AisDead) - .addReg(C, false, false, CisKill) - .addReg(B, false, false, BisKill).addImm(Size-Amt); + MI->setDesc(get(Opc)); + MI->getOperand(3).setImm(Size-Amt); + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } case X86::CMOVB16rr: case X86::CMOVB32rr: @@ -1357,7 +1345,11 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; } - + if (NewMI) { + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + NewMI = false; + } MI->setDesc(get(Opc)); // Fallthrough intended. } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 7a78e039b1..16dc366c16 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -325,6 +325,34 @@ def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ return N->hasOneUse(); }]>; +// 'shld' and 'shrd' instruction patterns. Note that even though these have +// the srl and shl in their patterns, the C++ code must still check for them, +// because predicates are tested before children nodes are explored. + +def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), + (or (srl node:$src1, node:$amt1), + (shl node:$src2, node:$amt2)), [{ + assert(N->getOpcode() == ISD::OR); + return N->getOperand(0).getOpcode() == ISD::SRL && + N->getOperand(1).getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) && + isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) && + N->getOperand(0).getConstantOperandVal(1) == + N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); +}]>; + +def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), + (or (shl node:$src1, node:$amt1), + (srl node:$src2, node:$amt2)), [{ + assert(N->getOpcode() == ISD::OR); + return N->getOperand(0).getOpcode() == ISD::SHL && + N->getOperand(1).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) && + isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) && + N->getOperand(0).getConstantOperandVal(1) == + N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); +}]>; + //===----------------------------------------------------------------------===// // Instruction list... // @@ -2973,6 +3001,22 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt), (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), (SHRD32mrCL addr:$dst, GR32:$src2)>; +def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))), + (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), + (SHRD32rrCL GR32:$src1, GR32:$src2)>; + +def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), + (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), + addr:$dst), + (SHRD32mrCL addr:$dst, GR32:$src2)>; + +def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), + (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), + GR32:$src2, (i8 imm:$amt2)), addr:$dst), + (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; + // (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) def : Pat<(or (shl GR32:$src1, CL:$amt), (srl GR32:$src2, (sub 32, CL:$amt))), @@ -2982,6 +3026,22 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt), (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), (SHLD32mrCL addr:$dst, GR32:$src2)>; +def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))), + (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), + (SHLD32rrCL GR32:$src1, GR32:$src2)>; + +def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), + (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), + addr:$dst), + (SHLD32mrCL addr:$dst, GR32:$src2)>; + +def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), + (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), + GR32:$src2, (i8 imm:$amt2)), addr:$dst), + (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; + // (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) def : Pat<(or (srl GR16:$src1, CL:$amt), (shl GR16:$src2, (sub 16, CL:$amt))), @@ -2991,6 +3051,22 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt), (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), (SHRD16mrCL addr:$dst, GR16:$src2)>; +def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))), + (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), + (SHRD16rrCL GR16:$src1, GR16:$src2)>; + +def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), + (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), + addr:$dst), + (SHRD16mrCL addr:$dst, GR16:$src2)>; + +def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), + (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), + GR16:$src2, (i8 imm:$amt2)), addr:$dst), + (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; + // (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) def : Pat<(or (shl GR16:$src1, CL:$amt), (srl GR16:$src2, (sub 16, CL:$amt))), @@ -3000,6 +3076,22 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt), (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), (SHLD16mrCL addr:$dst, GR16:$src2)>; +def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))), + (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), + (SHLD16rrCL GR16:$src1, GR16:$src2)>; + +def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), + (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), + addr:$dst), + (SHLD16mrCL addr:$dst, GR16:$src2)>; + +def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), + (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; + +def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), + GR16:$src2, (i8 imm:$amt2)), addr:$dst), + (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; + //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// |