diff options
-rw-r--r-- | include/llvm/Target/TargetLowering.h | 7 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 65 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 5 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 6 | ||||
-rw-r--r-- | test/CodeGen/ARM/ldst-f32-2-i32.ll | 28 |
5 files changed, 109 insertions, 2 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index c81d1f76d0..5141b7b562 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -963,6 +963,13 @@ public: return isTypeLegal(VT); } + /// isDesirableToPromoteOp - Return true if it is profitable for dag combiner + /// to transform a floating point op of specified opcode to a equivalent op of + /// an integer type. e.g. f32 load -> i32 load can be profitable on ARM. + virtual bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const { + return false; + } + /// IsDesirableToPromoteOp - This method query the target whether it is /// beneficial for dag combiner to promote the specified node. If true, it /// should return the desired promotion type by reference. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e91592b242..dd7d56ab9d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -42,6 +42,7 @@ STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); +STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); namespace { static cl::opt<bool> @@ -234,6 +235,7 @@ namespace { SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); + SDValue TransformFPLoadStorePair(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -6111,6 +6113,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); } +/// TransformFPLoadStorePair - For a given floating point load / store pair, +/// if the load value isn't used by any other operations, then consider +/// transforming the pair to integer load / store operations if the target +/// deems the transformation profitable. +SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && + Value.hasOneUse() && + Chain == SDValue(Value.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(Value); + EVT VT = LD->getMemoryVT(); + if (!VT.isFloatingPoint() || + VT != ST->getMemoryVT() || + LD->isNonTemporal() || + ST->isNonTemporal() || + LD->getPointerInfo().getAddrSpace() != 0 || + ST->getPointerInfo().getAddrSpace() != 0) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || + !TLI.isOperationLegal(ISD::STORE, IntVT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) + return SDValue(); + + unsigned LDAlign = LD->getAlignment(); + unsigned STAlign = ST->getAlignment(); + const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); + if (LDAlign < ABIAlign || STAlign < ABIAlign) + return SDValue(); + + SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), + LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + false, false, LDAlign); + + SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), + NewLD, ST->getBasePtr(), + ST->getPointerInfo(), + false, false, STAlign); + + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewST.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1), + &DeadNodes); + ++LdStFP2Int; + return NewST; + } + + return SDValue(); +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -6210,6 +6269,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // Try transforming a pair floating point load / store ops to integer + // load / store ops. + SDValue NewST = TransformFPLoadStorePair(N); + if (NewST.getNode()) + return NewST; + if (CombinerAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 50557671ab..92ea6cb0f8 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5724,6 +5724,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, + EVT VT) const { + return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); +} + bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { if (!Subtarget->allowsUnalignedMem()) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 28bf60c8c2..b06b8d3e15 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -213,14 +213,16 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual const char *getTargetNodeName(unsigned Opcode) const; virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const; + /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON? diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll new file mode 100644 index 0000000000..2d016f6cd4 --- /dev/null +++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s +; Check if the f32 load / store pair are optimized to i32 load / store. +; rdar://8944252 + +define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind { +; CHECK: t: +entry: + %src6 = bitcast float* %src to i8* + %0 = icmp eq i32 %width, 0 + br i1 %0, label %return, label %bb + +bb: +; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3 +; CHECK: str [[REGISTER]], [r2], #4 + %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ] + %tmp = mul i32 %j.05, %index + %uglygep = getelementptr i8* %src6, i32 %tmp + %src_addr.04 = bitcast i8* %uglygep to float* + %dst_addr.03 = getelementptr float* %dst, i32 %j.05 + %1 = load float* %src_addr.04, align 4 + store float %1, float* %dst_addr.03, align 4 + %2 = add i32 %j.05, 1 + %exitcond = icmp eq i32 %2, %width + br i1 %exitcond, label %return, label %bb + +return: + ret void +} |