diff options
author | Dale Johannesen <dalej@apple.com> | 2007-05-17 21:31:21 +0000 |
---|---|---|
committer | Dale Johannesen <dalej@apple.com> | 2007-05-17 21:31:21 +0000 |
commit | 8dd86c14d46b2deb1e1335d495acc579d70bf60f (patch) | |
tree | 5e8ef16820f3ae767b3057a3cc1a0f46b4f2dfdc | |
parent | e99d492a15c3aa068293964ec22be4190061784b (diff) |
More effective breakdown of memcpy into repeated load/store. These are now
in the order lod;lod;lod;sto;sto;sto which means the load-store optimizer
has a better chance of producing ldm/stm. Ideally you would get cooperation
from the RA as well but this is not there yet.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37179 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 77 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 1 |
2 files changed, 77 insertions, 1 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d059db1acf..d30a288d19 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -187,7 +187,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Expand mem operations genericly. setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Expand); + setOperationAction(ISD::MEMCPY , MVT::Other, Custom); setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); // Use the default implementation. @@ -255,6 +255,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setSchedulingPreference(SchedulingForRegPressure); computeRegisterProperties(); + + maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type } @@ -1328,6 +1330,78 @@ static SDOperand LowerSRx(SDOperand Op, SelectionDAG &DAG, return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); } +SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { + SDOperand Chain = Op.getOperand(0); + SDOperand Dest = Op.getOperand(1); + SDOperand Src = Op.getOperand(2); + SDOperand Count = Op.getOperand(3); + unsigned Align = + (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); + if (Align == 0) Align = 1; + + ConstantSDNode *I = dyn_cast<ConstantSDNode>(Count); + // Just call memcpy if: + // not 4-byte aligned + // size is unknown + // size is >= the threshold. + if ((Align & 3) != 0 || + !I || + I->getValue() >= 64 || + (I->getValue() & 3) != 0) { + MVT::ValueType IntPtr = getPointerTy(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getTargetData()->getIntPtrType(); + Entry.Node = Op.getOperand(1); Args.push_back(Entry); + Entry.Node = Op.getOperand(2); Args.push_back(Entry); + Entry.Node = Op.getOperand(3); Args.push_back(Entry); + std::pair<SDOperand,SDOperand> CallResult = + LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, + DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); + return CallResult.second; + } + + // Otherwise do repeated 4-byte loads and stores. To be improved. + assert((I->getValue() & 3) == 0); + assert((Align & 3) == 0); + unsigned NumMemOps = I->getValue() >> 2; + unsigned EmittedNumMemOps = 0; + unsigned SrcOff = 0, DstOff = 0; + MVT::ValueType VT = MVT::i32; + unsigned VTSize = 4; + const int MAX_LOADS_IN_LDM = 6; + SDOperand LoadChains[MAX_LOADS_IN_LDM]; + SDOperand Loads[MAX_LOADS_IN_LDM]; + + // Emit up to 4 loads, then a TokenFactor barrier, then the same + // number of stores. The loads and stores will get combined into + // ldm/stm later on. + while(EmittedNumMemOps < NumMemOps) { + unsigned int i; + for (i=0; i<MAX_LOADS_IN_LDM && EmittedNumMemOps+i < NumMemOps; i++) { + Loads[i] = DAG.getLoad(VT, Chain, + DAG.getNode(ISD::ADD, VT, Src, + DAG.getConstant(SrcOff, VT)), + NULL, 0); + LoadChains[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &LoadChains[0], i); + + for (i=0; i<MAX_LOADS_IN_LDM && EmittedNumMemOps+i < NumMemOps; i++) { + Chain = DAG.getStore(Chain, Loads[i], + DAG.getNode(ISD::ADD, VT, Dest, + DAG.getConstant(DstOff, VT)), + NULL, 0); + DstOff += VTSize; + } + EmittedNumMemOps += i; + } + + return Chain; +} + SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Don't know how to custom lower this!"); abort(); @@ -1358,6 +1432,7 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: break; case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); } return SDOperand(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 8e9ef88989..b0c5e89489 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -135,6 +135,7 @@ namespace llvm { SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG); }; } |