diff options
author | Nadav Rotem <nrotem@apple.com> | 2012-09-30 07:17:56 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2012-09-30 07:17:56 +0000 |
commit | 73fab91f2cd5fa3dbe4593506ac4a31aa3faf897 (patch) | |
tree | 91e463fd0509b2545b7bde7aac893736e625a5e6 | |
parent | e5f163a3b98a531c3ff35da0e97e7364c0e04b95 (diff) |
Revert r164910 because it causes failures to several phase2 builds.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164911 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 254 | ||||
-rw-r--r-- | test/CodeGen/X86/MergeConsecutiveStores.ll | 150 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce-2.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce-3.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce.ll | 5 |
5 files changed, 12 insertions, 412 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index db7140d114..da3293bc8c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -301,10 +301,6 @@ namespace { /// looking for a better chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); - /// Merge consecutive store operations into a wide store. - /// \return True if some memory operations were changed. - bool MergeConsecutiveStores(StoreSDNode *N); - public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), @@ -7427,250 +7423,6 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } -/// Returns the base pointer and an integer offset from that object. -static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) { - if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) { - int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); - SDValue Base = Ptr->getOperand(0); - return std::make_pair(Base, Offset); - } - - return std::make_pair(Ptr, 0); -} - -struct ConsecutiveMemoryChainSorter { - typedef std::pair<LSBaseSDNode*, int64_t> MemLink; - bool operator()(MemLink LHS, MemLink RHS) { - return LHS.second < RHS.second; - } -}; - -bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { - EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; - - // Don't handle vectors. - if (MemVT.isVector() || !MemVT.isSimple()) - return false; - - // Perform an early exit check. Do not bother looking at stored values that - // are not constants or loads. - SDValue StoredVal = St->getValue(); - if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && - !isa<LoadSDNode>(StoredVal)) - return false; - - // Is this a load-to-store or a const-store. - bool IsLoadSrc = isa<LoadSDNode>(StoredVal); - - // Only look at ends of store chains. - SDValue Chain = SDValue(St, 1); - if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) - return false; - - // This holds the base pointer and the offset in bytes from the base pointer. - std::pair<SDValue, int64_t> BasePtr = - GetPointerBaseAndOffset(St->getBasePtr()); - - // We must have a base and an offset. - if (!BasePtr.first.getNode()) - return false; - - // Do not handle stores to undef base pointers. - if (BasePtr.first.getOpcode() == ISD::UNDEF) - return false; - - SmallVector<std::pair<StoreSDNode*, int64_t>, 8> StoreNodes; - // Walk up the chain and look for nodes with offsets from the same - // base pointer. Stop when reaching an instruction with a different kind - // or instruction which has a different base pointer. - StoreSDNode *Index = St; - while (Index) { - // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 1)->hasOneUse()) - break; - - // Find the base pointer and offset for this memory node. - std::pair<SDValue, int64_t> Ptr = - GetPointerBaseAndOffset(Index->getBasePtr()); - - // Check that the base pointer is the same as the original one. - if (Ptr.first.getNode() != BasePtr.first.getNode()) - break; - - // Check that the alignment is the same. - if (Index->getAlignment() != St->getAlignment()) - break; - - // The memory operands must not be volatile. - if (Index->isVolatile() || Index->isIndexed()) - break; - - // No truncation. - if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) - if (St->isTruncatingStore()) - break; - - // The stored memory type must be the same. - if (Index->getMemoryVT() != MemVT) - break; - - // We found a potential memory operand to merge. - StoreNodes.push_back(std::make_pair(Index,Ptr.second)); - - // Move up the chain to the next memory operation. - Index = dyn_cast<StoreSDNode>(Index->getChain().getNode()); - } - - // Check if there is anything to merge. - if (StoreNodes.size() < 2) - return false; - - // Remember which node is the earliest node in the chain. - LSBaseSDNode *EarliestOp = StoreNodes.back().first; - - // Sort the memory operands according to their distance from the base pointer. - std::sort(StoreNodes.begin(), StoreNodes.end(), - ConsecutiveMemoryChainSorter()); - - LSBaseSDNode *FirstInChain = StoreNodes[0].first; - - // Scan the memory operations on the chain and find the first non-consecutive - // store memory address. - unsigned LastConsecutiveStore = 0; - int64_t StartAddress = StoreNodes[0].second; - for (unsigned i=1; i<StoreNodes.size(); ++i) { - int64_t CurrAddress = StoreNodes[i].second; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveStore = i; - } - - // Store the constants into memory as one consecutive store. - if (!IsLoadSrc) { - unsigned LastConst = 0; - for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { - SDValue StoredVal = StoreNodes[i].first->getValue(); - bool IsConst = (isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal)); - if (!IsConst) - break; - LastConst = i; - } - unsigned NumElem = std::min(LastConsecutiveStore + 1, LastConst + 1); - if (NumElem < 2) - return false; - - EVT JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); - DebugLoc DL = StoreNodes[0].first->getDebugLoc(); - SmallVector<SDValue, 8> Ops; - - for (unsigned i = 0; i < NumElem ; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first); - Ops.push_back(St->getValue()); - } - - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, - JointMemOpVT, &Ops[0], Ops.size()); - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, BV, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), false, false, - FirstInChain->getAlignment()); - - for (unsigned i = 0; i < NumElem ; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first); - CombineTo(St, NewStore); - } - return true; - } - - // Look for load nodes wich are used by the stored values. - SmallVector<std::pair<LoadSDNode*, int64_t>, 8> LoadNodes; - - // Find acceptible loads. Loads need to have the same chain (token factor), - // must not be zext, volatile, indexed, and they must be consecutive. - SDValue LdBasePtr; - for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { - LoadSDNode *Ld = dyn_cast<LoadSDNode>(StoreNodes[i].first->getValue()); - if (!Ld) break; - - // Loads must only have one use. - if (!Ld->hasNUsesOfValue(1, 0)) - break; - - // Check that the alignment is the same as the stores. - if (Ld->getAlignment() != St->getAlignment()) - break; - - // The memory operands must not be volatile. - if (Ld->isVolatile() || Ld->isIndexed()) - break; - - if (Ld->getExtensionType() != ISD::NON_EXTLOAD) - break; - - // The stored memory type must be the same. - if (Ld->getMemoryVT() != MemVT) - break; - - std::pair<SDValue, int64_t> LdPtr = - GetPointerBaseAndOffset(Ld->getBasePtr()); - - // If this is not the first ptr that we check. - if (LdBasePtr.getNode()) { - // The base ptr must be the same, - if (LdPtr.first != LdBasePtr) - break; - } else { - LdBasePtr = LdPtr.first; - } - - // We found a potential memory operand to merge. - LoadNodes.push_back(std::make_pair(Ld, LdPtr.second)); - } - - if (LoadNodes.size() < 2) - return false; - - // Scan the memory operations on the chain and find the first non-consecutive - // load memory address. - unsigned LastConsecutiveLoad = 0; - StartAddress = LoadNodes[0].second; - for (unsigned i=1; i<LoadNodes.size(); ++i) { - int64_t CurrAddress = LoadNodes[i].second; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveLoad = i; - } - - unsigned NumElem = - std::min(LastConsecutiveStore + 1, LastConsecutiveLoad + 1); - - EVT JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); - DebugLoc LoadDL = LoadNodes[0].first->getDebugLoc(); - DebugLoc StoreDL = StoreNodes[0].first->getDebugLoc(); - - LoadSDNode *FirstLoad = LoadNodes[0].first; - SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, - FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), - false, false, false, - FirstLoad->getAlignment()); - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), false, false, - FirstInChain->getAlignment()); - - for (unsigned i = 0; i < NumElem ; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first); - CombineTo(St, NewStore); - } - - return true; -} - SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -7872,12 +7624,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->getAlignment()); } - - // Only perform this optimization before the types are legal, because we - // don't want to generate illegal types in this optimization. - if (!LegalTypes && MergeConsecutiveStores(ST)) - return SDValue(N, 0); - return ReduceLoadOpStoreWidth(N); } diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll deleted file mode 100644 index 435f38c8ad..0000000000 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ /dev/null @@ -1,150 +0,0 @@ -; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.8.0" - -%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } - -@a = common global [10000 x %struct.A] zeroinitializer, align 8 - -; Move all of the constants using a single vector store. -; CHECK: merge_const_store -; CHECK: movq %xmm0 -; CHECK: ret -define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { - %1 = icmp sgt i32 %count, 0 - br i1 %1, label %.lr.ph, label %._crit_edge -.lr.ph: - %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] - %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] - %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 - store i8 1, i8* %2, align 1 - %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 - store i8 2, i8* %3, align 1 - %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 - store i8 3, i8* %4, align 1 - %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 - store i8 4, i8* %5, align 1 - %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 - store i8 5, i8* %6, align 1 - %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 - store i8 6, i8* %7, align 1 - %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 - store i8 7, i8* %8, align 1 - %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 - store i8 8, i8* %9, align 1 - %10 = add nsw i32 %i.02, 1 - %11 = getelementptr inbounds %struct.A* %.01, i64 1 - %exitcond = icmp eq i32 %10, %count - br i1 %exitcond, label %._crit_edge, label %.lr.ph -._crit_edge: - ret void -} - -; Move the first 4 constants as a single vector. Move the rest as scalars. -; CHECK: merge_nonconst_store -; CHECK: movd %xmm0 -; CHECK: movb -; CHECK: movb -; CHECK: movb -; CHECK: movb -; CHECK: ret -define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { - %1 = icmp sgt i32 %count, 0 - br i1 %1, label %.lr.ph, label %._crit_edge -.lr.ph: - %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] - %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] - %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 - store i8 1, i8* %2, align 1 - %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 - store i8 2, i8* %3, align 1 - %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 - store i8 3, i8* %4, align 1 - %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 - store i8 4, i8* %5, align 1 - %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 - store i8 %zz, i8* %6, align 1 ; <----------- Not a const; - %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 - store i8 6, i8* %7, align 1 - %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 - store i8 7, i8* %8, align 1 - %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 - store i8 8, i8* %9, align 1 - %10 = add nsw i32 %i.02, 1 - %11 = getelementptr inbounds %struct.A* %.01, i64 1 - %exitcond = icmp eq i32 %10, %count - br i1 %exitcond, label %._crit_edge, label %.lr.ph -._crit_edge: - ret void -} - - -;CHECK: merge_loads -; load: -;CHECK: movw -; store: -;CHECK: movw -;CHECK: ret -define void @merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { - %1 = icmp sgt i32 %count, 0 - br i1 %1, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %0 - %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 - %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 - br label %4 - -; <label>:4 ; preds = %4, %.lr.ph - %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] - %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ] - %5 = load i8* %2, align 1 - %6 = load i8* %3, align 1 - %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 - store i8 %5, i8* %7, align 1 - %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 - store i8 %6, i8* %8, align 1 - %9 = add nsw i32 %i.02, 1 - %10 = getelementptr inbounds %struct.A* %.01, i64 1 - %exitcond = icmp eq i32 %9, %count - br i1 %exitcond, label %._crit_edge, label %4 - -._crit_edge: ; preds = %4, %0 - ret void -} - -; The loads and the stores are interleved. Can't merge them. -;CHECK: no_merge_loads -;CHECK: movb -;CHECK: movb -;CHECK: movb -;CHECK: movb -;CHECK: ret -define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { - %1 = icmp sgt i32 %count, 0 - br i1 %1, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %0 - %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 - %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 - br label %a4 - -a4: ; preds = %4, %.lr.ph - %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ] - %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ] - %a5 = load i8* %2, align 1 - %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 - store i8 %a5, i8* %a7, align 1 - %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 - %a6 = load i8* %3, align 1 - store i8 %a6, i8* %a8, align 1 - %a9 = add nsw i32 %i.02, 1 - %a10 = getelementptr inbounds %struct.A* %.01, i64 1 - %exitcond = icmp eq i32 %a9, %count - br i1 %exitcond, label %._crit_edge, label %a4 - -._crit_edge: ; preds = %4, %0 - ret void -} - - diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll index b094fed2f6..b546462b68 100644 --- a/test/CodeGen/X86/loop-strength-reduce-2.ll +++ b/test/CodeGen/X86/loop-strength-reduce-2.ll @@ -1,18 +1,20 @@ -; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=pic | FileCheck %s -check-prefix=PIC -; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s -check-prefix=STATIC +; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC +; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC ; ; Make sure the common loop invariant A is hoisted up to preheader, ; since too many registers are needed to subsume it into the addressing modes. ; It's safe to sink A in when it's not pic. ; PIC: align -; PIC: movlpd %xmm0, -4([[REG:%e[a-z]+]]) +; PIC: movl $4, -4([[REG:%e[a-z]+]]) +; PIC: movl $5, ([[REG]]) ; PIC: addl $4, [[REG]] ; PIC: decl {{%e[[a-z]+}} ; PIC: jne ; STATIC: align -; STATIC: movlpd %xmm0, -4(%ecx) +; STATIC: movl $4, -4(%ecx) +; STATIC: movl $5, (%ecx) ; STATIC: addl $4, %ecx ; STATIC: decl %eax ; STATIC: jne diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll index e2d77f2a6f..b1c9fb9c07 100644 --- a/test/CodeGen/X86/loop-strength-reduce-3.ll +++ b/test/CodeGen/X86/loop-strength-reduce-3.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s ; CHECK: align -; CHECK: movlpd %xmm0, -4(%ecx) +; CHECK: movl $4, -4(%ecx) +; CHECK: movl $5, (%ecx) ; CHECK: addl $4, %ecx ; CHECK: decl %eax ; CHECK: jne diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll index d197451eee..42c6ac4983 100644 --- a/test/CodeGen/X86/loop-strength-reduce.ll +++ b/test/CodeGen/X86/loop-strength-reduce.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s +; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s ; CHECK: align -; CHECK: movlpd %xmm0, -4(%ecx) +; CHECK: movl $4, -4(%ecx) +; CHECK: movl $5, (%ecx) ; CHECK: addl $4, %ecx ; CHECK: decl %eax ; CHECK: jne |