Implement FRINT lowering on PPC using frin

Like nearbyint, rint can be implemented on PPC using the frin instruction. The complication comes from the fact that rint needs to set the FE_INEXACT flag when the result does not equal the input value (and frin does not do that). As a result, we use a custom inserter which, after the rounding, compares the rounded value with the original, and if they differ, explicitly sets the XX bit in the FPSCR register (which corresponds to FE_INEXACT). Once LLVM has better modeling of the floating-point environment we should be able to (often) eliminate this extra complexity. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178362 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hal Finkel <hfinkel@anl.gov> 2013-03-29 19:41:55 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2013-03-29 19:41:55 +0000
commit: 0882fd6c4f90f1cbaa4bb6f6ceec289428cca734 (patch)
tree: 53a0c59a3544690e09572552450574f32786f3af
parent: 5114226c1896f250be8881adf67d55a7e54b50fc (diff)
3 files changed, 95 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index aadcb45359..0574f047c2 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -172,6 +172,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     if (TM.Options.UnsafeFPMath) {
       setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
       setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+
+      // These need to set FE_INEXACT, and use a custom inserter.
+      setOperationAction(ISD::FRINT, MVT::f64, Legal);
+      setOperationAction(ISD::FRINT, MVT::f32, Legal);
     }
   }
 
@@ -6524,6 +6528,51 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
     // Restore FPSCR value.
     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+  } else if (MI->getOpcode() == PPC::FRINDrint ||
+             MI->getOpcode() == PPC::FRINSrint) {
+    bool isf32 = MI->getOpcode() == PPC::FRINSrint;
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src = MI->getOperand(1).getReg();
+    DebugLoc dl   = MI->getDebugLoc();
+
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+    // Perform the rounding.
+    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
+      .addReg(Src);
+
+    // Compare the results.
+    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
+      .addReg(Dest).addReg(Src);
+
+    // If the results were not equal, then set the FPSCR XX bit.
+    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, midMBB);
+    F->insert(It, exitMBB);
+    exitMBB->splice(exitMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+
+    BB->addSuccessor(midMBB);
+    BB->addSuccessor(exitMBB);
+
+    BB = midMBB;
+
+    // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
+    // the FI bit here because that will not automatically set XX also,
+    // and XX is what libm interprets as the FE_INEXACT flag.
+    BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
+    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+
+    BB->addSuccessor(exitMBB);
+
+    BB = exitMBB;
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index af3b57f2ed..f897a47800 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1141,6 +1141,15 @@ let Uses = [RM] in {
                         "frin $frD, $frB", FPGeneral,
                         [(set f32:$frD, (fnearbyint f32:$frB))]>;
 
+  // These pseudos expand to rint but also set FE_INEXACT when the result does
+  // not equal the argument.
+  let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
+    def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+                            "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
+    def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+                            "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+  }
+
   def FRIPD  : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
                         "frip $frD, $frB", FPGeneral,
                         [(set f64:$frD, (fceil f64:$frB))]>;
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
index 8177a48a60..b210a6bda8 100644
--- a/test/CodeGen/PowerPC/rounding-ops.ll
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -106,3 +106,40 @@ define double @test10(double %x) nounwind  {
 }
 
 declare double @trunc(double) nounwind readnone
+
+define float @test11(float %x) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test11:
+; CHECK-NOT: frin
+
+; CHECK-FM: test11:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test12(double %x) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test12:
+; CHECK-NOT: frin
+
+; CHECK-FM: test12:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare double @rint(double) nounwind readnone
+
author	Hal Finkel <hfinkel@anl.gov>	2013-03-29 19:41:55 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2013-03-29 19:41:55 +0000
commit	0882fd6c4f90f1cbaa4bb6f6ceec289428cca734 (patch)
tree	53a0c59a3544690e09572552450574f32786f3af
parent	5114226c1896f250be8881adf67d55a7e54b50fc (diff)