diff options
author | Stepan Dyatkovskiy <stpworld@narod.ru> | 2013-04-05 05:52:14 +0000 |
---|---|---|
committer | Stepan Dyatkovskiy <stpworld@narod.ru> | 2013-04-05 05:52:14 +0000 |
commit | 89becbb97423fb608a4dd85ec10c3fde4398d956 (patch) | |
tree | e9329b5ebf6b5871c8cc2aa2ccf6479e7c7a4b51 /lib/Target/ARM/ARMLoadStoreOptimizer.cpp | |
parent | 1abaf907b6aff6e468cb838fa40e0ec6cc5ece24 (diff) |
Fix for PR14824: "Optimization arm_ldst_opt inserts newly generated instruction vldmia at incorrect position".
Patch introduces memory operands tracking in ARMLoadStoreOpt::LoadStoreMultipleOpti. For each register it keeps the order of load operations as it was before optimization pass.
It is kind of deep improvement of fix proposed by Hao: http://llvm.org/bugs/show_bug.cgi?id=14824#c4
But it also tracks conflicts between different register classes (e.g. D2 and S5).
For more details see:
Bug description: http://llvm.org/bugs/show_bug.cgi?id=14824
LLVM Commits discussion:
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130311/167936.html
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130318/168688.html
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130325/169376.html
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130401/170238.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178851 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM/ARMLoadStoreOptimizer.cpp')
-rw-r--r-- | lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 171 |
1 files changed, 161 insertions, 10 deletions
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index e4e683c2a0..0682459e1c 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -87,6 +87,53 @@ namespace { MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; + class UnitRegsMap { + public: + UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {} + const SmallVector<unsigned, 4>& operator[](unsigned Reg) { + DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found = + Cache.find(Reg); + if (found != Cache.end()) + return found->second; + else + return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg))) + .first->second; + } + private: + SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) { + SmallVector<unsigned, 4> Res; + + const TargetRegisterClass* TRC = TRI->getRegClass(Reg); + if (TRC == &ARM::QPRRegClass) { + if (Reg > ARM::Q7) { + Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1)); + return Res; + } + + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3)); + + return Res; + } + + if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) { + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); + + return Res; + } + + Res.push_back(Reg); + + return Res; + + } + const TargetRegisterInfo* TRI; + DenseMap<unsigned, SmallVector<unsigned, 4> > Cache; + }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -128,6 +175,11 @@ namespace { MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I); + unsigned AddMemOp(MemOpQueue& MemOps, + const MemOpQueueEntry newEntry, + UnitRegsMap& UnitRegsInfo, + SmallSet<unsigned, 4>& UsedUnitRegs, + unsigned At = -1U); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -1213,12 +1265,103 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } +/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti. +/// It adds store mem ops with simple push_back/insert method, +/// without any additional logic. +/// For load operation it does the next: +/// 1. Adds new load operation into MemOp collection at "At" position. +/// 2. Removes any "load" operations from MemOps, that changes "Reg" register +/// contents, prior to "At". +/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector > +/// UsedUnitRegs - set of unit-registers currently in use. +/// At - position at which it would added, and prior which the clean-up +/// should be made (for load operation). +/// FIXME: The clean-up also should be made for store operations, +/// but the memory address should be analyzed instead of unit registers. +unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps, + const MemOpQueueEntry NewEntry, + UnitRegsMap& UnitRegsInfo, + SmallSet<unsigned, 4>& UsedUnitRegs, + unsigned At) { + unsigned Cleaned = 0; + + if (At == -1U) { + At = MemOps.size(); + MemOps.push_back(NewEntry); + } else + MemOps.insert(&MemOps[At], NewEntry); + + // FIXME: + // If operation is not load, leave it as is by now, + // So 0 overridden ops would cleaned in this case. + if (!NewEntry.MBBI->mayLoad()) + return 0; + + const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg]; + + bool FoundOverriddenLoads = false; + + for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i) + if (UsedUnitRegs.count(NewEntryUnitRegs[i])) { + FoundOverriddenLoads = true; + break; + } + + // If we detect that this register is used by load operations that are + // predecessors for the new one, remove them from MemOps then. + if (FoundOverriddenLoads) { + MemOpQueue UpdatedMemOps; + + // Scan through MemOps entries. + for (unsigned i = 0; i != At; ++i) { + MemOpQueueEntry& MemOpEntry = MemOps[i]; + + // FIXME: Skip non-load operations by now. + if (!MemOpEntry.MBBI->mayLoad()) + continue; + + const SmallVector<unsigned, 4>& MemOpUnitRegs = + UnitRegsInfo[MemOpEntry.Reg]; + + // Lookup entry that loads contents into register used by new entry. + bool ReleaseThisEntry = false; + for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) { + if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(), + MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) { + ReleaseThisEntry = true; + ++Cleaned; + break; + } + } + + if (ReleaseThisEntry) { + const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg]; + for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r) + UsedUnitRegs.erase(RelesedRegs[r]); + } else + UpdatedMemOps.push_back(MemOpEntry); + } + + // Keep anything without changes after At position. + for (unsigned i = At, e = MemOps.size(); i != e; ++i) + UpdatedMemOps.push_back(MemOps[i]); + + MemOps.swap(UpdatedMemOps); + } + + UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end()); + + return Cleaned; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; MemOpQueue MemOps; + UnitRegsMap UnitRegsInfo(TRI); + SmallSet<unsigned, 4> UsedRegUnits; unsigned CurrBase = 0; int CurrOpc = -1; unsigned CurrSize = 0; @@ -1265,8 +1408,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); ++NumMemOps; + const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg]; + UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end()); Advance = true; } else { if (Clobber) { @@ -1278,20 +1424,24 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; + unsigned OverridesCleaned = + AddMemOp(MemOps, + MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI), + UnitRegsInfo, UsedRegUnits) != 0; + NumMemOps += 1 - OverridesCleaned; Advance = true; } else { - for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); - I != E; ++I) { - if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; + for (unsigned I = 0; I != NumMemOps; ++I) { + if (Offset < MemOps[I].Offset) { + MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI); + unsigned OverridesCleaned = + AddMemOp(MemOps, entry, UnitRegsInfo, + UsedRegUnits, I) != 0; + NumMemOps += 1 - OverridesCleaned; + Advance = true; break; - } else if (Offset == I->Offset) { + } else if (Offset == MemOps[I].Offset) { // Collision! This can't be merged! break; } @@ -1362,6 +1512,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrPredReg = 0; if (NumMemOps) { MemOps.clear(); + UsedRegUnits.clear(); NumMemOps = 0; } |