diff options
author | Hal Finkel <hfinkel@anl.gov> | 2013-03-31 10:12:51 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2013-03-31 10:12:51 +0000 |
commit | 8049ab15e4b638a07d6f230329945c2310eca27b (patch) | |
tree | 89ddcf3d670a9c7f4fafe2db694222cabdd48485 | |
parent | 9ad0f4907b3ba0916a8b6cdb95d298d2ddb7d405 (diff) |
Add the PPC lfiwax instruction
This instruction is available on modern PPC64 CPUs, and is now used
to improve the SINT_TO_FP lowering (by eliminating the need for the
separate sign extension instruction and decreasing the amount of
needed stack space).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178446 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/PowerPC/PPC.td | 28 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 43 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 5 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 10 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCSubtarget.cpp | 1 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCSubtarget.h | 2 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/i32-to-float.ll | 14 |
8 files changed, 82 insertions, 22 deletions
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 01cd55ee27..602e33cd29 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -59,6 +59,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true", + "Enable the lfiwax instruction">; def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true", "Enable the fri[mnpz] instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", @@ -80,7 +82,6 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", // FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz // FRE p5 through p7 fre (vs. fres, available since p3) // FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) -// LFIWAX p6, p6x, p7 lfiwax // LFIWZX p7 lfiwzx // POPCNTB p5 through p7 popcntb and related instructions // RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates @@ -133,14 +134,15 @@ def : ProcessorModel<"e5500", PPCE5500Model, FeatureSTFIWX, FeatureBookE, FeatureISEL]>; def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, - FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */]>; + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureISEL, FeaturePOPCNTD, + FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, - FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, FeatureQPX]>; + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureISEL, FeaturePOPCNTD, + FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, + FeatureQPX]>; def : Processor<"pwr3", G5Itineraries, [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, FeatureSTFIWX, Feature64Bit]>; @@ -157,16 +159,18 @@ def : Processor<"pwr5x", G5Itineraries, def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>; + FeatureLFIWAX, FeatureFPRND, Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"pwr6x", G5Itineraries, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, - Feature64Bit]>; + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, Feature64Bit]>; def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - FeatureFPRND, FeatureISEL, FeaturePOPCNTD, - FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>; + FeatureLFIWAX, FeatureFPRND, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bf31029a1d..12269212af 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4809,20 +4809,43 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, - Op.getOperand(0)); + SDValue Ld; + if (PPCSubTarget.hasLFIWAX()) { + int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - // STD the extended value into the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, - MachinePointerInfo(), false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); - // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), - false, false, false, 0); + assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && + "Expected an i32 store"); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, 4, 4); + SDValue Ops[] = { Store, FIdx }; + Ld = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, + DAG.getVTList(MVT::f64, MVT::Other), Ops, 2, + MVT::i32, MMO); + } else { + int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, + Op.getOperand(0)); + + // STD the extended value into the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + // Load the value as a double. + Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, false, 0); + } // FCFID it and return it. SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index bce05a16c6..a924ac4e00 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -242,6 +242,11 @@ namespace llvm { /// or i32. LBRX, + /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point + /// load which sign-extends from a 32-bit integer value into the + /// destination 64-bit register. + LFIWAX, + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, /// produces an ADDIS8 instruction that adds the TOC base register to /// sym@toc@ha. diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index d3d7dc66c9..478e6127ab 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -20,6 +20,10 @@ include "PPCInstrFormats.td" def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; +def SDT_PPClfiwax : SDTypeProfile<1, 1, [ // lfiwax + SDTCisVT<0, f64>, SDTCisPtrTy<1> +]>; + def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -63,6 +67,8 @@ def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; +def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwax, + [SDNPHasChain, SDNPMayLoad]>; // Extract FPSCR (not modeled at the DAG level). def PPCmffs : SDNode<"PPCISD::MFFS", @@ -843,6 +849,10 @@ def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), "lfdx $frD, $src", LdStLFD, [(set f64:$frD, (load xaddr:$src))]>; + +def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src), + "lfiwax $frD, $src", LdStLFD, + [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 482a3bc939..67cf13603b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -528,6 +528,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, bool noImmForm = false; switch (OpC) { + case PPC::LFIWAX: case PPC::LVEBX: case PPC::LVEHX: case PPC::LVEWX: diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index b793c37de8..57e18ed07e 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -39,6 +39,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasQPX(false) , HasFSQRT(false) , HasSTFIWX(false) + , HasLFIWAX(false) , HasFPRND(false) , HasISEL(false) , HasPOPCNTD(false) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index bf5bd844c0..3958bc9c00 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -78,6 +78,7 @@ protected: bool HasQPX; bool HasFSQRT; bool HasSTFIWX; + bool HasLFIWAX; bool HasFPRND; bool HasISEL; bool HasPOPCNTD; @@ -158,6 +159,7 @@ public: // Specific obvious features. bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } + bool hasLFIWAX() const { return HasLFIWAX; } bool hasFPRND() const { return HasFPRND; } bool hasAltivec() const { return HasAltivec; } bool hasQPX() const { return HasQPX; } diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll index 0807717e50..bed940c5de 100644 --- a/test/CodeGen/PowerPC/i32-to-float.ll +++ b/test/CodeGen/PowerPC/i32-to-float.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -14,6 +15,13 @@ entry: ; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]] ; CHECK: frsp 1, [[REG3]] ; CHECK: blr + +; CHECK-A2: @foo +; CHECK-A2: stw 3, +; CHECK-A2: lfiwax [[REG:[0-9]+]], +; CHECK-A2: fcfid [[REG2:[0-9]+]], [[REG]] +; CHECK-A2: frsp 1, [[REG2]] +; CHECK-A2: blr } define double @goo(i32 %a) nounwind { @@ -27,5 +35,11 @@ entry: ; CHECK: lfd [[REG2:[0-9]+]], ; CHECK: fcfid 1, [[REG2]] ; CHECK: blr + +; CHECK-A2: @goo +; CHECK-A2: stw 3, +; CHECK-A2: lfiwax [[REG:[0-9]+]], +; CHECK-A2: fcfid 1, [[REG]] +; CHECK-A2: blr } |