diff options
author | Michael J. Spencer <bigcheesegs@gmail.com> | 2012-02-24 19:01:22 +0000 |
---|---|---|
committer | Michael J. Spencer <bigcheesegs@gmail.com> | 2012-02-24 19:01:22 +0000 |
commit | 1a2d061ec08b86ba91d7009b6ffcf08d5bac3f42 (patch) | |
tree | cbec6cd246acee66e0949a47d315b3e3ea3169ba | |
parent | 27bc818eaf73efe169f95c4dd8f564fd051dd824 (diff) |
Add WIN_FTOL_* psudo-instructions to model the unique calling convention
used by the Win32 _ftol2 runtime function. Patch by Joe Groff!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151382 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86FloatingPoint.cpp | 26 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 99 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 18 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 5 | ||||
-rw-r--r-- | test/CodeGen/X86/win_ftol2.ll | 130 |
6 files changed, 261 insertions, 32 deletions
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index e3461c82c7..5c00388541 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -1644,6 +1644,32 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { return; } + case X86::WIN_FTOL_32: + case X86::WIN_FTOL_64: { + MachineBasicBlock::iterator InsertPt = MI; + + // Push the operand into ST0. + MachineOperand &Op = MI->getOperand(0); + assert(Op.isUse() && Op.isReg() && + Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6); + unsigned FPReg = getFPReg(Op); + if (Op.isKill()) + moveToTop(FPReg, I); + else + duplicateToTop(FPReg, FPReg, I); + + // Emit the call. This will pop the operand. + BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32)) + .addExternalSymbol("_ftol2") + .addReg(X86::ST0, RegState::ImplicitKill) + .addReg(X86::EAX, RegState::Define | RegState::Implicit) + .addReg(X86::EDX, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + --StackTop; + + break; + } + case X86::RET: case X86::RETI: // If RET has an FP register use operand, pass the first one in ST(0) and diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8c645972e6..8cb8fbb369 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -187,15 +187,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setLibcallName(RTLIB::SREM_I64, "_allrem"); setLibcallName(RTLIB::UREM_I64, "_aullrem"); setLibcallName(RTLIB::MUL_I64, "_allmul"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); + + // The _ftol2 runtime function has an unusual calling conv, which + // is modeled by a special pseudo-instruction. + setLibcallName(RTLIB::FPTOUINT_F64_I64, 0); + setLibcallName(RTLIB::FPTOUINT_F32_I64, 0); + setLibcallName(RTLIB::FPTOUINT_F64_I32, 0); + setLibcallName(RTLIB::FPTOUINT_F32_I32, 0); } if (Subtarget->isTargetDarwin()) { @@ -315,6 +318,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); } + if (isTargetFTOL()) { + // Use the _ftol2 runtime function, which has a pseudo-instruction + // to handle its weird calling convention. + setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + } + // TODO: when we have SSE, these could be more efficient, by using movd/movq. if (!X86ScalarSSEf64) { setOperationAction(ISD::BITCAST , MVT::f32 , Expand); @@ -7708,14 +7717,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { EVT DstTy = Op.getValueType(); - if (!IsSigned) { + if (!IsSigned && !isIntegerTypeFTOL(DstTy)) { assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); DstTy = MVT::i64; } assert(DstTy.getSimpleVT() <= MVT::i64 && DstTy.getSimpleVT() >= MVT::i16 && - "Unknown FP_TO_SINT to lower!"); + "Unknown FP_TO_INT to lower!"); // These are really Legal. if (DstTy == MVT::i32 && @@ -7726,26 +7735,29 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); - // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary - // stack slot. + // We lower FP->int64 either into FISTP64 followed by a load from a temporary + // stack slot, or into the FTOL runtime function. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - - unsigned Opc; - switch (DstTy.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); - case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; - case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; - case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; - } + if (!IsSigned && isIntegerTypeFTOL(DstTy)) + Opc = X86ISD::WIN_FTOL; + else + switch (DstTy.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); + case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; + case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; + case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; + } SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); EVT TheVT = Op.getOperand(0).getValueType(); + // FIXME This causes a redundant load/store if the SSE-class value is already + // in memory, such as if it is on the callstack. if (isScalarFPTypeInSSEReg(TheVT)) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, DL, Value, StackSlot, @@ -7770,12 +7782,23 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), MachineMemOperand::MOStore, MemSize, MemSize); - // Build the FP_TO_INT*_IN_MEM - SDValue Ops[] = { Chain, Value, StackSlot }; - SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), - Ops, 3, DstTy, MMO); - - return std::make_pair(FIST, StackSlot); + if (Opc != X86ISD::WIN_FTOL) { + // Build the FP_TO_INT*_IN_MEM + SDValue Ops[] = { Chain, Value, StackSlot }; + SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), + Ops, 3, DstTy, MMO); + return std::make_pair(FIST, StackSlot); + } else { + SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL, + DAG.getVTList(MVT::Other, MVT::Glue), + Chain, Value); + SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX, + MVT::i32, ftol.getValue(1)); + SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX, + MVT::i32, eax.getValue(2)); + SDValue pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, eax, edx); + return std::make_pair(pair, SDValue()); + } } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, @@ -7788,10 +7811,14 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. if (FIST.getNode() == 0) return Op; - // Load the result. - return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, MachinePointerInfo(), - false, false, false, 0); + if (StackSlot.getNode()) + // Load the result. + return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), + FIST, StackSlot, MachinePointerInfo(), + false, false, false, 0); + else + // The node is the result. + return FIST; } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, @@ -10837,16 +10864,25 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SUBE: // We don't want to expand or promote these. return; - case ISD::FP_TO_SINT: { + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; + + if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType())) + return; + std::pair<SDValue,SDValue> Vals = - FP_TO_INTHelper(SDValue(N, 0), DAG, true); + FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned); SDValue FIST = Vals.first, StackSlot = Vals.second; if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, - MachinePointerInfo(), - false, false, false, 0)); + if (StackSlot.getNode() != 0) + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, + MachinePointerInfo(), + false, false, false, 0)); + else + Results.push_back(FIST); } return; } @@ -11060,6 +11096,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; + case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 00b17d3795..8b6cda5927 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -299,6 +299,9 @@ namespace llvm { // falls back to heap allocation if not. SEG_ALLOCA, + // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui. + WIN_FTOL, + // Memory barrier MEMBARRIER, MFENCE, @@ -611,6 +614,18 @@ namespace llvm { (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } + /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine + /// for fptoui. + bool isTargetFTOL() const { + return Subtarget->isTargetWindows() && !Subtarget->is64Bit(); + } + + /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be + /// used for fptoui to the given type. + bool isIntegerTypeFTOL(EVT VT) const { + return isTargetFTOL() && VT == MVT::i64; + } + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index ce0a885083..ac49232912 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -125,10 +125,26 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), [(set GR64:$dst, (X86SegAlloca GR64:$size))]>, Requires<[In64BitMode]>; - } +// The MSVC runtime contains an _ftol2 routine for converting floating-point +// to integer values. It has a strange calling convention: the input is +// popped from the x87 stack, and the return value is given in EDX:EAX. No +// other registers (aside from flags) are touched. +// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80 +// variant is unnecessary. + +let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in { + def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src), + "# win32 fptoui", + [(X86WinFTOL RFP32:$src)]>, + Requires<[In32BitMode]>; + def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src), + "# win32 fptoui", + [(X86WinFTOL RFP64:$src)]>, + Requires<[In32BitMode]>; +} //===----------------------------------------------------------------------===// // EH Pseudo Instructions diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index e575cea804..24f5722d55 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -99,6 +99,8 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; +def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>; + def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; @@ -238,6 +240,9 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL, + [SDNPHasChain, SDNPOutGlue]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll new file mode 100644 index 0000000000..90d8a312eb --- /dev/null +++ b/test/CodeGen/X86/win_ftol2.ll @@ -0,0 +1,130 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL +; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT +; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT +; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT +; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT +; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT +; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2 + +; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This +; function has a nonstandard calling convention: the input value is expected on +; the x87 stack instead of the callstack. The input value is popped by the +; callee. Mingw32 uses normal cdecl compiler-rt functions. + +define i64 @double_ui64(double %x) nounwind { +entry: +; COMPILERRT: @double_ui64 +; COMPILERRT-NOT: calll __ftol2 +; FTOL: @double_ui64 +; FTOL: fldl +; FTOL: calll __ftol2 +; FTOL-NOT: fstp + %0 = fptoui double %x to i64 + ret i64 %0 +} + +define i64 @float_ui64(float %x) nounwind { +entry: +; COMPILERRT: @float_ui64 +; COMPILERRT-NOT: calll __ftol2 +; FTOL: @float_ui64 +; FTOL: flds +; FTOL: calll __ftol2 +; FTOL-NOT: fstp + %0 = fptoui float %x to i64 + ret i64 %0 +} + +define i64 @double_ui64_2(double %x, double %y, double %z) nounwind { +; COMPILERRT: @double_ui64_2 +; FTOL: @double_ui64_2 +; FTOL_2: @double_ui64_2 +;; stack is empty +; FTOL_2: fldl +;; stack is %z +; FTOL_2: fldl +;; stack is %y %z +; FTOL_2: fldl +;; stack is %x %y %z +; FTOL_2: fdiv %st(0), %st(1) +;; stack is %x %1 %z +; FTOL_2: fsubp %st(2) +;; stack is %1 %2 +; FTOL_2: fxch +; FTOL_2-NOT: fld +; FTOL_2-NOT: fst +;; stack is %2 %1 +; FTOL_2: calll __ftol2 +; FTOL_2-NOT: fxch +; FTOL_2-NOT: fld +; FTOL_2-NOT: fst +; FTOL_2: calll __ftol2 +;; stack is empty + + %1 = fdiv double %x, %y + %2 = fsub double %x, %z + %3 = fptoui double %1 to i64 + %4 = fptoui double %2 to i64 + %5 = sub i64 %3, %4 + ret i64 %5 +} + +define i64 @double_ui64_3(double %x, double %y, double %z) nounwind { +; COMPILERRT: @double_ui64_3 +; FTOL: @double_ui64_3 +; FTOL_2: @double_ui64_3 +;; stack is empty +; FTOL_2: fldl +;; stack is %z +; FTOL_2: fldl +;; stack is %y %z +; FTOL_2: fldl +;; stack is %x %y %z +; FTOL_2: fdiv %st(0), %st(1) +;; stack is %x %1 %z +; FTOL_2: fsubp %st(2) +;; stack is %1 %2 +; FTOL_2-NOT: fxch +; FTOL_2-NOT: fld +; FTOL_2-NOT: fst +;; stack is %1 %2 (still) +; FTOL_2: calll __ftol2 +; FTOL_2-NOT: fxch +; FTOL_2-NOT: fld +; FTOL_2-NOT: fst +; FTOL_2: calll __ftol2 +;; stack is empty + + %1 = fdiv double %x, %y + %2 = fsub double %x, %z + %3 = fptoui double %1 to i64 + %4 = fptoui double %2 to i64 + %5 = sub i64 %4, %3 + ret i64 %5 +} + +define {double, i64} @double_ui64_4(double %x, double %y) nounwind { +; COMPILERRT: @double_ui64_4 +; FTOL: @double_ui64_4 +; FTOL_2: @double_ui64_4 +;; stack is empty +; FTOL_2: fldl +;; stack is %y +; FTOL_2: fldl +;; stack is %x %y +; FTOL_2: fxch +;; stack is %y %x +; FTOL_2: calll __ftol2 +;; stack is %x +; FTOL_2: fld %st(0) +;; stack is %x %x +; FTOL_2: calll __ftol2 +;; stack is %x + + %1 = fptoui double %x to i64 + %2 = fptoui double %y to i64 + %3 = sub i64 %1, %2 + %4 = insertvalue {double, i64} undef, double %x, 0 + %5 = insertvalue {double, i64} %4, i64 %3, 1 + ret {double, i64} %5 +} |