diff options
author | Anton Korobeynikov <asl@math.spbu.ru> | 2009-07-16 14:14:33 +0000 |
---|---|---|
committer | Anton Korobeynikov <asl@math.spbu.ru> | 2009-07-16 14:14:33 +0000 |
commit | 0a42d2b4376526dbef25834b29a39fa684f9a902 (patch) | |
tree | c9c5317374d4b023274b75988216493ed967de0c | |
parent | d20af96f5b1c528af2dad59ac0c9cc4f2a968d2d (diff) |
Properly handle divides. As a bonus - implement memory versions of them.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76003 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 206 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.cpp | 11 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrInfo.td | 86 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZRegisterInfo.td | 4 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/08-DivRem.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/08-DivRemMemOp.ll | 64 |
6 files changed, 313 insertions, 66 deletions
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index e009eec48b..3df814e1bb 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -30,6 +30,10 @@ #include "llvm/Support/Debug.h" using namespace llvm; +static const unsigned subreg_32bit = 1; +static const unsigned subreg_even = 1; +static const unsigned subreg_odd = 2; + namespace { /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's /// instead of register numbers for the leaves of the matched tree. @@ -129,6 +133,10 @@ namespace { SDValue &Base, SDValue &Disp, SDValue &Index); SDNode *Select(SDValue Op); + + bool TryFoldLoad(SDValue P, SDValue N, + SDValue &Base, SDValue &Disp, SDValue &Index); + bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM, bool is12Bit, unsigned Depth = 0); bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM); @@ -573,6 +581,15 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr, return false; } +bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, + SDValue &Base, SDValue &Disp, SDValue &Index) { + if (ISD::isNON_EXTLoad(N.getNode()) && + N.hasOneUse() && + IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) + return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); + return false; +} + /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void SystemZDAGToDAGISel::InstructionSelect() { @@ -593,7 +610,9 @@ void SystemZDAGToDAGISel::InstructionSelect() { SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { SDNode *Node = Op.getNode(); + MVT NVT = Node->getValueType(0); DebugLoc dl = Op.getDebugLoc(); + unsigned Opcode = Node->getOpcode(); // Dump information about the Node being selected #ifndef NDEBUG @@ -611,8 +630,195 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { DOUT << "\n"; Indent -= 2; #endif + return NULL; // Already selected. + } + + switch (Opcode) { + default: break; + case ISD::SDIVREM: { + unsigned Opc, MOpc, ClrOpc = 0; + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + MVT ResVT; + switch (NVT.getSimpleVT()) { + default: assert(0 && "Unsupported VT!"); + case MVT::i32: + Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m; + ClrOpc = SystemZ::MOV32ri16; + ResVT = MVT::v2i32; + break; + case MVT::i64: + Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m; + ResVT = MVT::v2i64; + break; + } + + SDValue Tmp0, Tmp1, Tmp2; + bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2); + + // Prepare the dividend + SDNode *Dividend = N0.getNode(); + + // Insert prepared dividend into suitable 'subreg' + SDNode *Tmp = CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, + dl, ResVT); + Dividend = + CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT, + SDValue(Tmp, 0), SDValue(Dividend, 0), + CurDAG->getTargetConstant(subreg_odd, MVT::i32)); + + // Zero out even subreg, if needed + if (ClrOpc) { + SDNode * ZeroHi = CurDAG->getTargetNode(SystemZ::MOV32ri16, dl, NVT, + CurDAG->getTargetConstant(0, MVT::i32)); + Dividend = + CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT, + SDValue(Dividend, 0), + SDValue(ZeroHi, 0), + CurDAG->getTargetConstant(subreg_even, MVT::i32)); + } + + SDNode *Result; + SDValue DivVal = SDValue(Dividend, 0); + if (foldedLoad) { + SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; + Result = CurDAG->getTargetNode(MOpc, dl, ResVT, Ops, array_lengthof(Ops)); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(Result, 0)); + } else { + Result = CurDAG->getTargetNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1); + } + + // Copy the division (odd subreg) result, if it is needed. + if (!Op.getValue(0).use_empty()) { + SDNode *Div = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, NVT, + SDValue(Result, 0), + CurDAG->getTargetConstant(subreg_odd, + MVT::i32)); + ReplaceUses(Op.getValue(0), SDValue(Div, 0)); + #ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result->dump(CurDAG)); + DOUT << "\n"; + #endif + } + + // Copy the remainder (even subreg) result, if it is needed. + if (!Op.getValue(1).use_empty()) { + SDNode *Rem = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, NVT, + SDValue(Result, 0), + CurDAG->getTargetConstant(subreg_even, + MVT::i32)); + ReplaceUses(Op.getValue(1), SDValue(Rem, 0)); + #ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result->dump(CurDAG)); + DOUT << "\n"; + #endif + } + +#ifndef NDEBUG + Indent -= 2; +#endif + return NULL; } + case ISD::UDIVREM: { + unsigned Opc, MOpc, ClrOpc; + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + MVT ResVT; + + switch (NVT.getSimpleVT()) { + default: assert(0 && "Unsupported VT!"); + case MVT::i32: + Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m; + ClrOpc = SystemZ::MOV32ri16; + ResVT = MVT::v2i32; + break; + case MVT::i64: + Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m; + ClrOpc = SystemZ::MOV64ri16; + ResVT = MVT::v2i64; + break; + } + + SDValue Tmp0, Tmp1, Tmp2; + bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2); + + // Prepare the dividend + SDNode *Dividend = N0.getNode(); + + // Insert prepared dividend into suitable 'subreg' + SDNode *Tmp = CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, + dl, ResVT); + Dividend = + CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT, + SDValue(Tmp, 0), SDValue(Dividend, 0), + CurDAG->getTargetConstant(subreg_odd, MVT::i32)); + + // Zero out even subreg, if needed + SDNode * ZeroHi = CurDAG->getTargetNode(ClrOpc, dl, NVT, + CurDAG->getTargetConstant(0, + MVT::i32)); + Dividend = + CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT, + SDValue(Dividend, 0), + SDValue(ZeroHi, 0), + CurDAG->getTargetConstant(subreg_even, MVT::i32)); + + SDValue DivVal = SDValue(Dividend, 0); + SDNode *Result; + if (foldedLoad) { + SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; + Result = CurDAG->getTargetNode(MOpc, dl,ResVT, + Ops, array_lengthof(Ops)); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(Result, 0)); + } else { + Result = CurDAG->getTargetNode(Opc, dl, ResVT, DivVal, N1); + } + + // Copy the division (odd subreg) result, if it is needed. + if (!Op.getValue(0).use_empty()) { + SDNode *Div = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, NVT, + SDValue(Result, 0), + CurDAG->getTargetConstant(subreg_odd, + MVT::i32)); + ReplaceUses(Op.getValue(0), SDValue(Div, 0)); + #ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result->dump(CurDAG)); + DOUT << "\n"; + #endif + } + + // Copy the remainder (even subreg) result, if it is needed. + if (!Op.getValue(1).use_empty()) { + SDNode *Rem = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, NVT, + SDValue(Result, 0), + CurDAG->getTargetConstant(subreg_even, + MVT::i32)); + ReplaceUses(Op.getValue(1), SDValue(Rem, 0)); + #ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result->dump(CurDAG)); + DOUT << "\n"; + #endif + } + +#ifndef NDEBUG + Indent -= 2; +#endif + + return NULL; + } + } // Select the default instruction SDNode *ResNode = SelectCode(Op); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 096fe420cc..952f4edb63 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -43,7 +43,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : // Set up the register classes. addRegisterClass(MVT::i32, SystemZ::GR32RegisterClass); addRegisterClass(MVT::i64, SystemZ::GR64RegisterClass); + addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass); addRegisterClass(MVT::i128, SystemZ::GR128RegisterClass); + addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass); // Compute derived properties from the register classes computeRegisterProperties(); @@ -70,6 +72,15 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + // FIXME: Can we lower these 2 efficiently? setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::i64, Expand); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 17bf19c6d9..5a34d95e67 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -580,22 +580,34 @@ def MULSX64rr32 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR32:$src2), "msgfr\t{$dst, $src2}", [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>; -def SDIVREM64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), - "dr\t{$dst, $src2}", - []>; - -def SDIVREM128rrP : Pseudo<(outs GR128:$dst), (ins GR128:$src1, GR64:$src2), - "dsgr\t{$dst, $src2}", - []>; - -def UDIVREM64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), - "dlr\t{$dst, $src2}", - []>; +def SDIVREM32r : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), + "dr\t{$dst, $src2}", + []>; +def SDIVREM64r : Pseudo<(outs GR128:$dst), (ins GR128:$src1, GR64:$src2), + "dsgr\t{$dst, $src2}", + []>; -def UDIVREM128rrP : Pseudo<(outs GR128:$dst), (ins GR128:$src1, GR64:$src2), - "dlgr\t{$dst, $src2}", - []>; +def UDIVREM32r : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), + "dlr\t{$dst, $src2}", + []>; +def UDIVREM64r : Pseudo<(outs GR128:$dst), (ins GR128:$src1, GR64:$src2), + "dlgr\t{$dst, $src2}", + []>; +let mayLoad = 1 in { +def SDIVREM32m : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2), + "d\t{$dst, $src2}", + []>; +def SDIVREM64m : Pseudo<(outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2), + "dsg\t{$dst, $src2}", + []>; +def UDIVREM32m : Pseudo<(outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2), + "dl\t{$dst, $src2}", + []>; +def UDIVREM64m : Pseudo<(outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2), + "dlg\t{$dst, $src2}", + []>; +} // mayLoad } // isTwoAddress = 1 //===----------------------------------------------------------------------===// @@ -790,51 +802,5 @@ def : Pat<(mulhu GR64:$src1, GR64:$src2), GR64:$src2), subreg_even)>; -// divs -// FIXME: Add memory versions -def : Pat<(sdiv GR32:$src1, GR32:$src2), - (EXTRACT_SUBREG (SDIVREM64rrP (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GR32:$src1, subreg_odd), - GR32:$src2), - subreg_odd)>; -def : Pat<(sdiv GR64:$src1, GR64:$src2), - (EXTRACT_SUBREG (SDIVREM128rrP (INSERT_SUBREG (i128 (IMPLICIT_DEF)), - GR64:$src1, subreg_odd), - GR64:$src2), - subreg_odd)>; -def : Pat<(udiv GR32:$src1, GR32:$src2), - (EXTRACT_SUBREG (UDIVREM64rrP (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GR32:$src1, subreg_odd), - GR32:$src2), - subreg_odd)>; -def : Pat<(udiv GR64:$src1, GR64:$src2), - (EXTRACT_SUBREG (UDIVREM128rrP (INSERT_SUBREG (i128 (IMPLICIT_DEF)), - GR64:$src1, subreg_odd), - GR64:$src2), - subreg_odd)>; - -// rems -// FIXME: Add memory versions -def : Pat<(srem GR32:$src1, GR32:$src2), - (EXTRACT_SUBREG (SDIVREM64rrP (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GR32:$src1, subreg_odd), - GR32:$src2), - subreg_even)>; -def : Pat<(srem GR64:$src1, GR64:$src2), - (EXTRACT_SUBREG (SDIVREM128rrP (INSERT_SUBREG (i128 (IMPLICIT_DEF)), - GR64:$src1, subreg_odd), - GR64:$src2), - subreg_even)>; -def : Pat<(urem GR32:$src1, GR32:$src2), - (EXTRACT_SUBREG (UDIVREM64rrP (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - GR32:$src1, subreg_odd), - GR32:$src2), - subreg_even)>; -def : Pat<(urem GR64:$src1, GR64:$src2), - (EXTRACT_SUBREG (UDIVREM128rrP (INSERT_SUBREG (i128 (IMPLICIT_DEF)), - GR64:$src1, subreg_odd), - GR64:$src2), - subreg_even)>; - def : Pat<(i32 imm:$src), (EXTRACT_SUBREG (MOV64ri32 (i64 imm:$src)), subreg_32bit)>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index d3e7159364..6fbb4cdb87 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -332,7 +332,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, } // Even-odd register pairs -def GR64P : RegisterClass<"SystemZ", [i64], 64, +def GR64P : RegisterClass<"SystemZ", [v2i32], 64, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> { let SubRegClassList = [GR32, GR32]; @@ -368,7 +368,7 @@ def GR64P : RegisterClass<"SystemZ", [i64], 64, }]; } -def GR128 : RegisterClass<"SystemZ", [i128], 128, +def GR128 : RegisterClass<"SystemZ", [i128, v2i64], 128, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> { let SubRegClassList = [GR64, GR64]; diff --git a/test/CodeGen/SystemZ/08-DivRem.ll b/test/CodeGen/SystemZ/08-DivRem.ll index ea9214f812..45cbfea0e0 100644 --- a/test/CodeGen/SystemZ/08-DivRem.ll +++ b/test/CodeGen/SystemZ/08-DivRem.ll @@ -1,7 +1,7 @@ -; RUN: llvm-as < %s | llc | grep dsgr | count 2 -; RUN: llvm-as < %s | llc | grep dr | count 2 -; RUN: llvm-as < %s | llc | grep dlr | count 2 -; RUN: llvm-as < %s | llc | grep dlgr | count 2 +; RUN: llvm-as < %s | llc | grep dsgr | count 2 +; RUN: llvm-as < %s | llc | grep dr | count 2 +; RUN: llvm-as < %s | llc | grep dlr | count 2 +; RUN: llvm-as < %s | llc | grep dlgr | count 2 target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" target triple = "s390x-unknown-linux-gnu" diff --git a/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/test/CodeGen/SystemZ/08-DivRemMemOp.ll new file mode 100644 index 0000000000..41eef36b2d --- /dev/null +++ b/test/CodeGen/SystemZ/08-DivRemMemOp.ll @@ -0,0 +1,64 @@ +; RUN: llvm-as < %s | llc | grep {d.%} | count 2 +; RUN: llvm-as < %s | llc | grep dsg | count 2 +; RUN: llvm-as < %s | llc | grep {dl.%} | count 2 +; RUN: llvm-as < %s | llc | grep dlg | count 2 + +target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" +target triple = "s390x-unknown-linux-gnu" + +define i64 @div(i64 %a, i64* %b) nounwind readnone { +entry: + %b1 = load i64* %b + %div = sdiv i64 %a, %b1 + ret i64 %div +} + +define i64 @div1(i64 %a, i64* %b) nounwind readnone { +entry: + %b1 = load i64* %b + %div = udiv i64 %a, %b1 + ret i64 %div +} + +define i64 @rem(i64 %a, i64* %b) nounwind readnone { +entry: + %b1 = load i64* %b + %div = srem i64 %a, %b1 + ret i64 %div +} + +define i64 @rem1(i64 %a, i64* %b) nounwind readnone { +entry: + %b1 = load i64* %b + %div = urem i64 %a, %b1 + ret i64 %div +} + +define i32 @div2(i32 %a, i32* %b) nounwind readnone { +entry: + %b1 = load i32* %b + %div = sdiv i32 %a, %b1 + ret i32 %div +} + +define i32 @div3(i32 %a, i32* %b) nounwind readnone { +entry: + %b1 = load i32* %b + %div = udiv i32 %a, %b1 + ret i32 %div +} + +define i32 @rem2(i32 %a, i32* %b) nounwind readnone { +entry: + %b1 = load i32* %b + %div = srem i32 %a, %b1 + ret i32 %div +} + +define i32 @rem3(i32 %a, i32* %b) nounwind readnone { +entry: + %b1 = load i32* %b + %div = urem i32 %a, %b1 + ret i32 %div +} + |