diff options
author | Hal Finkel <hfinkel@anl.gov> | 2013-03-29 08:57:48 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2013-03-29 08:57:48 +0000 |
commit | f5d5c434606161fb017a34cb656fa4aa5a3e076b (patch) | |
tree | 4641a3e130c65183f13495d0228b9454e7bf7b60 | |
parent | ef484a376cce3729b45ad86eab5724aa83a61823 (diff) |
Add PPC FP rounding instructions fri[mnpz]
These instructions are available on the P5x (and later) and on the A2. They
implement the standard floating-point rounding operations (floor, trunc, etc.).
One caveat: frin (round to nearest) does not implement "ties to even", and so
is only enabled in fast-math mode.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178337 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/PowerPC/PPC.td | 27 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 17 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 29 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCSubtarget.cpp | 1 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCSubtarget.h | 2 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/rounding-ops.ll | 108 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vec_rounding.ll | 36 |
7 files changed, 190 insertions, 30 deletions
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 27f7157a28..01cd55ee27 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -59,6 +59,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true", + "Enable the fri[mnpz] instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", @@ -76,7 +78,6 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", // CMPB p6, p6x, p7 cmpb // DFP p6, p6x, p7 decimal floating-point instructions // FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz -// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz // FRE p5 through p7 fre (vs. fres, available since p3) // FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) // LFIWAX p6, p6x, p7 lfiwax @@ -132,14 +133,14 @@ def : ProcessorModel<"e5500", PPCE5500Model, FeatureSTFIWX, FeatureBookE, FeatureISEL]>; def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, FeatureISEL, - FeaturePOPCNTD, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */]>; + FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, + FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, FeatureISEL, - FeaturePOPCNTD, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */, FeatureQPX]>; + FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, + FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, FeatureQPX]>; def : Processor<"pwr3", G5Itineraries, [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, FeatureSTFIWX, Feature64Bit]>; @@ -151,19 +152,21 @@ def : Processor<"pwr5", G5Itineraries, FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr5x", G5Itineraries, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; + FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, + Feature64Bit]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - Feature64Bit /*, Feature64BitRegs */]>; + FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"pwr6x", G5Itineraries, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; + FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, + Feature64Bit]>; def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */]>; + FeatureFPRND, FeatureISEL, FeaturePOPCNTD, + FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 74e811b35c..84af0bbb31 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -158,6 +158,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + if (Subtarget->hasFPRND()) { + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + + // frin does not implement "ties to even." Thus, this is safe only in + // fast-math mode. + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + } + } + // PowerPC does not have BSWAP, CTPOP or CTTZ setOperationAction(ISD::BSWAP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index ea0be97e0d..af3b57f2ed 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1128,9 +1128,38 @@ let Uses = [RM] in { def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB), "fctiwz $frD, $frB", FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; + def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB), "frsp $frD, $frB", FPGeneral, [(set f32:$frD, (fround f64:$frB))]>; + + // The frin -> nearbyint mapping is valid only in fast-math mode. + def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB), + "frin $frD, $frB", FPGeneral, + [(set f64:$frD, (fnearbyint f64:$frB))]>; + def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB), + "frin $frD, $frB", FPGeneral, + [(set f32:$frD, (fnearbyint f32:$frB))]>; + + def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB), + "frip $frD, $frB", FPGeneral, + [(set f64:$frD, (fceil f64:$frB))]>; + def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB), + "frip $frD, $frB", FPGeneral, + [(set f32:$frD, (fceil f32:$frB))]>; + def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB), + "friz $frD, $frB", FPGeneral, + [(set f64:$frD, (ftrunc f64:$frB))]>; + def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB), + "friz $frD, $frB", FPGeneral, + [(set f32:$frD, (ftrunc f32:$frB))]>; + def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB), + "frim $frD, $frB", FPGeneral, + [(set f64:$frD, (ffloor f64:$frB))]>; + def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB), + "frim $frD, $frB", FPGeneral, + [(set f32:$frD, (ffloor f32:$frB))]>; + def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB), "fsqrt $frD, $frB", FPSqrt, [(set f64:$frD, (fsqrt f64:$frB))]>; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index be64700d8c..b793c37de8 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -39,6 +39,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasQPX(false) , HasFSQRT(false) , HasSTFIWX(false) + , HasFPRND(false) , HasISEL(false) , HasPOPCNTD(false) , HasLDBRX(false) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 36436f6e40..bf5bd844c0 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -78,6 +78,7 @@ protected: bool HasQPX; bool HasFSQRT; bool HasSTFIWX; + bool HasFPRND; bool HasISEL; bool HasPOPCNTD; bool HasLDBRX; @@ -157,6 +158,7 @@ public: // Specific obvious features. bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } + bool hasFPRND() const { return HasFPRND; } bool hasAltivec() const { return HasAltivec; } bool hasQPX() const { return HasQPX; } bool hasMFOCRF() const { return HasMFOCRF; } diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll new file mode 100644 index 0000000000..8177a48a60 --- /dev/null +++ b/test/CodeGen/PowerPC/rounding-ops.ll @@ -0,0 +1,108 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define float @test1(float %x) nounwind { + %call = tail call float @floorf(float %x) nounwind readnone + ret float %call + +; CHECK: test1: +; CHECK: frim 1, 1 + +; CHECK-FM: test1: +; CHECK-FM: frim 1, 1 +} + +declare float @floorf(float) nounwind readnone + +define double @test2(double %x) nounwind { + %call = tail call double @floor(double %x) nounwind readnone + ret double %call + +; CHECK: test2: +; CHECK: frim 1, 1 + +; CHECK-FM: test2: +; CHECK-FM: frim 1, 1 +} + +declare double @floor(double) nounwind readnone + +define float @test3(float %x) nounwind { + %call = tail call float @nearbyintf(float %x) nounwind readnone + ret float %call + +; CHECK: test3: +; CHECK-NOT: frin + +; CHECK-FM: test3: +; CHECK-FM: frin 1, 1 +} + +declare float @nearbyintf(float) nounwind readnone + +define double @test4(double %x) nounwind { + %call = tail call double @nearbyint(double %x) nounwind readnone + ret double %call + +; CHECK: test4: +; CHECK-NOT: frin + +; CHECK-FM: test4: +; CHECK-FM: frin 1, 1 +} + +declare double @nearbyint(double) nounwind readnone + +define float @test5(float %x) nounwind { + %call = tail call float @ceilf(float %x) nounwind readnone + ret float %call + +; CHECK: test5: +; CHECK: frip 1, 1 + +; CHECK-FM: test5: +; CHECK-FM: frip 1, 1 +} + +declare float @ceilf(float) nounwind readnone + +define double @test6(double %x) nounwind { + %call = tail call double @ceil(double %x) nounwind readnone + ret double %call + +; CHECK: test6: +; CHECK: frip 1, 1 + +; CHECK-FM: test6: +; CHECK-FM: frip 1, 1 +} + +declare double @ceil(double) nounwind readnone + +define float @test9(float %x) nounwind { + %call = tail call float @truncf(float %x) nounwind readnone + ret float %call + +; CHECK: test9: +; CHECK: friz 1, 1 + +; CHECK-FM: test9: +; CHECK-FM: friz 1, 1 +} + +declare float @truncf(float) nounwind readnone + +define double @test10(double %x) nounwind { + %call = tail call double @trunc(double %x) nounwind readnone + ret double %call + +; CHECK: test10: +; CHECK: friz 1, 1 + +; CHECK-FM: test10: +; CHECK-FM: friz 1, 1 +} + +declare double @trunc(double) nounwind readnone diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll index f41faa0339..7c55638620 100644 --- a/test/CodeGen/PowerPC/vec_rounding.ll +++ b/test/CodeGen/PowerPC/vec_rounding.ll @@ -13,8 +13,8 @@ define <2 x double> @floor_v2f64(<2 x double> %p) ret <2 x double> %t } ; CHECK: floor_v2f64: -; CHECK: bl floor -; CHECK: bl floor +; CHECK: frim +; CHECK: frim declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) define <4 x double> @floor_v4f64(<4 x double> %p) @@ -23,10 +23,10 @@ define <4 x double> @floor_v4f64(<4 x double> %p) ret <4 x double> %t } ; CHECK: floor_v4f64: -; CHECK: bl floor -; CHECK: bl floor -; CHECK: bl floor -; CHECK: bl floor +; CHECK: frim +; CHECK: frim +; CHECK: frim +; CHECK: frim declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) define <2 x double> @ceil_v2f64(<2 x double> %p) @@ -35,8 +35,8 @@ define <2 x double> @ceil_v2f64(<2 x double> %p) ret <2 x double> %t } ; CHECK: ceil_v2f64: -; CHECK: bl ceil -; CHECK: bl ceil +; CHECK: frip +; CHECK: frip declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) define <4 x double> @ceil_v4f64(<4 x double> %p) @@ -45,10 +45,10 @@ define <4 x double> @ceil_v4f64(<4 x double> %p) ret <4 x double> %t } ; CHECK: ceil_v4f64: -; CHECK: bl ceil -; CHECK: bl ceil -; CHECK: bl ceil -; CHECK: bl ceil +; CHECK: frip +; CHECK: frip +; CHECK: frip +; CHECK: frip declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) define <2 x double> @trunc_v2f64(<2 x double> %p) @@ -57,8 +57,8 @@ define <2 x double> @trunc_v2f64(<2 x double> %p) ret <2 x double> %t } ; CHECK: trunc_v2f64: -; CHECK: bl trunc -; CHECK: bl trunc +; CHECK: friz +; CHECK: friz declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) define <4 x double> @trunc_v4f64(<4 x double> %p) @@ -67,10 +67,10 @@ define <4 x double> @trunc_v4f64(<4 x double> %p) ret <4 x double> %t } ; CHECK: trunc_v4f64: -; CHECK: bl trunc -; CHECK: bl trunc -; CHECK: bl trunc -; CHECK: bl trunc +; CHECK: friz +; CHECK: friz +; CHECK: friz +; CHECK: friz declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) define <2 x double> @nearbyint_v2f64(<2 x double> %p) |