diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-04-01 18:12:58 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-04-01 18:12:58 +0000 |
commit | f28a29b776b7dc2b97d09c75d69494f862c216b3 (patch) | |
tree | 2a74a99231f00e6cf3ce08dd2ab9caf9e441e0be | |
parent | 46479197843ecb651adc9417c49bbd1b00acfcb6 (diff) |
Merge load/store sequences with adresses: base + index + offset
We would also like to merge sequences that involve a variable index like in the
example below.
int index = *idx++
int i0 = c[index+0];
int i1 = c[index+1];
b[0] = i0;
b[1] = i1;
By extending the parsing of the base pointer to handle dags that contain a
base, index, and offset we can handle examples like the one above.
The dag for the code above will look something like:
(load (i64 add (i64 copyfromreg %c)
(i64 signextend (i8 load %index))))
(load (i64 add (i64 copyfromreg %c)
(i64 signextend (i32 add (i32 signextend (i8 load %index))
(i32 1)))))
The code that parses the tree ignores the intermediate sign extensions. However,
if there is a sign extension it needs to be on all indexes.
(load (i64 add (i64 copyfromreg %c)
(i64 signextend (add (i8 load %index)
(i8 1))))
vs
(load (i64 add (i64 copyfromreg %c)
(i64 signextend (i32 add (i32 signextend (i8 load %index))
(i32 1)))))
radar://13536387
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178483 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 113 | ||||
-rw-r--r-- | test/CodeGen/X86/MergeConsecutiveStores.ll | 96 |
2 files changed, 184 insertions, 25 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d1f476e644..ff98a04c5b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7711,16 +7711,82 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } -/// Returns the base pointer and an integer offset from that object. -static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) { - if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) { - int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); - SDValue Base = Ptr->getOperand(0); - return std::make_pair(Base, Offset); +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +struct BaseIndexOffset { + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) : + Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} + + bool equalBaseIndex(const BaseIndexOffset &Other) { + return Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt; } - return std::make_pair(Ptr, 0); -} + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr) { + bool IsIndexSignExt = false; + + // Just Base or possibly anything else. + if (Ptr->getOpcode() != ISD::ADD) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Base + offset. + if (isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, + IsIndexSignExt); + } + + // Look at Base + Index + Offset cases. + SDValue Base = Ptr->getOperand(0); + SDValue IndexOffset = Ptr->getOperand(1); + + // Skip signextends. + if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { + IndexOffset = IndexOffset->getOperand(0); + IsIndexSignExt = true; + } + + // Either the case of Base + Index (no offset) or something else. + if (IndexOffset->getOpcode() != ISD::ADD) + return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + + // Now we have the case of Base + Index + offset. + SDValue Index = IndexOffset->getOperand(0); + SDValue Offset = IndexOffset->getOperand(1); + + if (!isa<ConstantSDNode>(Offset)) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Ignore signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else IsIndexSignExt = false; + + int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); + return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + } +}; /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. @@ -7767,16 +7833,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) return false; - // This holds the base pointer and the offset in bytes from the base pointer. - std::pair<SDValue, int64_t> BasePtr = - GetPointerBaseAndOffset(St->getBasePtr()); + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. - if (!BasePtr.first.getNode()) + if (!BasePtr.Base.getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.first.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.getOpcode() == ISD::UNDEF) return false; // Save the LoadSDNodes that we find in the chain. @@ -7798,11 +7864,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; // Find the base pointer and offset for this memory node. - std::pair<SDValue, int64_t> Ptr = - GetPointerBaseAndOffset(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); // Check that the base pointer is the same as the original one. - if (Ptr.first.getNode() != BasePtr.first.getNode()) + if (!Ptr.equalBaseIndex(BasePtr)) break; // Check that the alignment is the same. @@ -7828,7 +7893,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; // We found a potential memory operand to merge. - StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); + StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); // Find the next memory operand in the chain. If the next operand in the // chain is a store then move up and continue the scan with the next @@ -8025,7 +8090,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. - SDValue LdBasePtr; + BaseIndexOffset LdBasePtr; for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); @@ -8051,21 +8116,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Ld->getMemoryVT() != MemVT) break; - std::pair<SDValue, int64_t> LdPtr = - GetPointerBaseAndOffset(Ld->getBasePtr()); - + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); // If this is not the first ptr that we check. - if (LdBasePtr.getNode()) { + if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. - if (LdPtr.first != LdBasePtr) + if (!LdPtr.equalBaseIndex(LdBasePtr)) break; } else { // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr.first; + LdBasePtr = LdPtr; } // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); } if (LoadNodes.size() < 2) diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll index fbe8879ad6..bb227a0185 100644 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -337,3 +337,99 @@ block4: ; preds = %4, %.lr.ph ret void } +; Make sure that we merge the consecutive load/store sequence below and use a +; word (16 bit) instead of a byte copy. +; CHECK: MergeLoadStoreBaseIndexOffset +; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] +; CHECK: movw [[REG]], (%{{.*}}) +define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { + br label %1 + +; <label>:1 + %.09 = phi i32 [ %n, %0 ], [ %11, %1 ] + %.08 = phi i8* [ %b, %0 ], [ %10, %1 ] + %.0 = phi i64* [ %a, %0 ], [ %2, %1 ] + %2 = getelementptr inbounds i64* %.0, i64 1 + %3 = load i64* %.0, align 1 + %4 = getelementptr inbounds i8* %c, i64 %3 + %5 = load i8* %4, align 1 + %6 = add i64 %3, 1 + %7 = getelementptr inbounds i8* %c, i64 %6 + %8 = load i8* %7, align 1 + store i8 %5, i8* %.08, align 1 + %9 = getelementptr inbounds i8* %.08, i64 1 + store i8 %8, i8* %9, align 1 + %10 = getelementptr inbounds i8* %.08, i64 2 + %11 = add nsw i32 %.09, -1 + %12 = icmp eq i32 %11, 0 + br i1 %12, label %13, label %1 + +; <label>:13 + ret void +} + +; Make sure that we merge the consecutive load/store sequence below and use a +; word (16 bit) instead of a byte copy even if there are intermediate sign +; extensions. +; CHECK: MergeLoadStoreBaseIndexOffsetSext +; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] +; CHECK: movw [[REG]], (%{{.*}}) +define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) { + br label %1 + +; <label>:1 + %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] + %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] + %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] + %2 = getelementptr inbounds i8* %.0, i64 1 + %3 = load i8* %.0, align 1 + %4 = sext i8 %3 to i64 + %5 = getelementptr inbounds i8* %c, i64 %4 + %6 = load i8* %5, align 1 + %7 = add i64 %4, 1 + %8 = getelementptr inbounds i8* %c, i64 %7 + %9 = load i8* %8, align 1 + store i8 %6, i8* %.08, align 1 + %10 = getelementptr inbounds i8* %.08, i64 1 + store i8 %9, i8* %10, align 1 + %11 = getelementptr inbounds i8* %.08, i64 2 + %12 = add nsw i32 %.09, -1 + %13 = icmp eq i32 %12, 0 + br i1 %13, label %14, label %1 + +; <label>:14 + ret void +} + +; However, we can only merge ignore sign extensions when they are on all memory +; computations; +; CHECK: loadStoreBaseIndexOffsetSextNoSex +; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] +; CHECK-NOT: movw [[REG]], (%{{.*}}) +define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) { + br label %1 + +; <label>:1 + %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] + %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] + %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] + %2 = getelementptr inbounds i8* %.0, i64 1 + %3 = load i8* %.0, align 1 + %4 = sext i8 %3 to i64 + %5 = getelementptr inbounds i8* %c, i64 %4 + %6 = load i8* %5, align 1 + %7 = add i8 %3, 1 + %wrap.4 = sext i8 %7 to i64 + %8 = getelementptr inbounds i8* %c, i64 %wrap.4 + %9 = load i8* %8, align 1 + store i8 %6, i8* %.08, align 1 + %10 = getelementptr inbounds i8* %.08, i64 1 + store i8 %9, i8* %10, align 1 + %11 = getelementptr inbounds i8* %.08, i64 2 + %12 = add nsw i32 %.09, -1 + %13 = icmp eq i32 %12, 0 + br i1 %13, label %14, label %1 + +; <label>:14 + ret void +} |