aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp89
-rw-r--r--test/CodeGen/PowerPC/stdux-constuse.ll47
2 files changed, 136 insertions, 0 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d694bc782c..472919c3e3 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6917,6 +6917,16 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
return false;
+
+ // Backends without true r+i pre-indexed forms may need to pass a
+ // constant base with a variable offset so that constant coercion
+ // will work with the patterns in canonical form.
+ bool Swapped = false;
+ if (isa<ConstantSDNode>(BasePtr)) {
+ std::swap(BasePtr, Offset);
+ Swapped = true;
+ }
+
// Don't create a indexed load / store with zero offset.
if (isa<ConstantSDNode>(Offset) &&
cast<ConstantSDNode>(Offset)->isNullValue())
@@ -6942,6 +6952,48 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
}
+ // If the offset is a constant, there may be other adds of constants that
+ // can be folded with this one. We should do this to avoid having to keep
+ // a copy of the original base pointer.
+ SmallVector<SDNode *, 16> OtherUses;
+ if (isa<ConstantSDNode>(Offset))
+ for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
+ E = BasePtr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == Ptr.getNode())
+ continue;
+
+ if (Use->isPredecessorOf(N))
+ continue;
+
+ if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+ OtherUses.clear();
+ break;
+ }
+
+ SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
+ if (Op1.getNode() == BasePtr.getNode())
+ std::swap(Op0, Op1);
+ assert(Op0.getNode() == BasePtr.getNode() &&
+ "Use of ADD/SUB but not an operand");
+
+ if (!isa<ConstantSDNode>(Op1)) {
+ OtherUses.clear();
+ break;
+ }
+
+ // FIXME: In some cases, we can be smarter about this.
+ if (Op1.getValueType() != Offset.getValueType()) {
+ OtherUses.clear();
+ break;
+ }
+
+ OtherUses.push_back(Use);
+ }
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
// Now check for #3 and #4.
bool RealUse = false;
@@ -6991,6 +7043,43 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Replace other uses of BasePtr that can be updated to use Ptr
+ for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
+ unsigned OffsetIdx = 1;
+ if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
+ OffsetIdx = 0;
+ assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
+ BasePtr.getNode() && "Expected BasePtr operand");
+
+ APInt OV =
+ cast<ConstantSDNode>(Offset)->getAPIntValue();
+ if (AM == ISD::PRE_DEC)
+ OV = -OV;
+
+ ConstantSDNode *CN =
+ cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
+ APInt CNV = CN->getAPIntValue();
+ if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
+ CNV += OV;
+ else
+ CNV -= OV;
+
+ SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
+ SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
+ if (OffsetIdx == 0)
+ std::swap(NewOp1, NewOp2);
+
+ SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+ OtherUses[i]->getDebugLoc(),
+ OtherUses[i]->getValueType(0), NewOp1, NewOp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
+ removeFromWorkList(OtherUses[i]);
+ DAG.DeleteNode(OtherUses[i]);
+ }
+
// Replace the uses of Ptr with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Ptr.getNode());
diff --git a/test/CodeGen/PowerPC/stdux-constuse.ll b/test/CodeGen/PowerPC/stdux-constuse.ll
new file mode 100644
index 0000000000..e62d438014
--- /dev/null
+++ b/test/CodeGen/PowerPC/stdux-constuse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mcpu=a2 -disable-lsr < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test1(i64 %add, i64* %ptr) nounwind {
+entry:
+ %p1 = getelementptr i64* %ptr, i64 144115188075855
+ br label %for.cond2.preheader
+
+for.cond2.preheader:
+ %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+ br label %for.body4
+
+for.body4:
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ]
+ %i0 = phi i64* [ %p1, %for.cond2.preheader ], [ %i6, %for.body4 ]
+ %i6 = getelementptr i64* %i0, i64 400000
+ %i7 = getelementptr i64* %i6, i64 300000
+ %i8 = getelementptr i64* %i6, i64 200000
+ %i9 = getelementptr i64* %i6, i64 100000
+ store i64 %add, i64* %i6, align 32
+ store i64 %add, i64* %i7, align 32
+ store i64 %add, i64* %i8, align 32
+ store i64 %add, i64* %i9, align 32
+ %lsr.iv.next = add i32 %lsr.iv, -16
+ %exitcond.15 = icmp eq i32 %lsr.iv.next, 0
+ br i1 %exitcond.15, label %for.end, label %for.body4
+
+; Make sure that we generate the most compact form of this loop with no
+; unnecessary moves
+; CHECK: @test1
+; CHECK: mtctr
+; CHECK: stdux
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: bdnz
+
+for.end:
+ %inc9 = add nsw i32 %nl.018, 1
+ %exitcond = icmp eq i32 %inc9, 400000
+ br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:
+ ret i32 0
+}
+