diff options
-rw-r--r-- | include/llvm/Target/TargetRegisterInfo.h | 8 | ||||
-rw-r--r-- | lib/CodeGen/RegAllocLinearScan.cpp | 34 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseRegisterInfo.cpp | 23 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseRegisterInfo.h | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fabss.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fadds.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fdivs.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fmuls.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fp_convert.ll | 8 |
9 files changed, 65 insertions, 18 deletions
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index d06cdc4e8e..0fa3b38561 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -624,6 +624,14 @@ public: return 0; } + /// avoidWriteAfterWrite - Return true if the register allocator should avoid + /// writing a register from RC in two consecutive instructions. + /// This can avoid pipeline stalls on certain architectures. + /// It does cause increased register pressure, though. + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + return false; + } + /// UpdateRegAllocHint - A callback to allow target a chance to update /// register allocation hints when a register is "changed" (e.g. coalesced) /// to another register. e.g. On ARM, some virtual registers should target diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index b8cb5a7c92..b91f312b72 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -67,6 +67,11 @@ TrivCoalesceEnds("trivial-coalesce-ends", cl::desc("Attempt trivial coalescing of interval ends"), cl::init(false), cl::Hidden); +static cl::opt<bool> +AvoidWAWHazard("avoid-waw-hazard", + cl::desc("Avoid write-write hazards for some register classes"), + cl::init(false), cl::Hidden); + static RegisterRegAlloc linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); @@ -110,6 +115,7 @@ namespace { if (NumRecentlyUsedRegs > 0) RecentRegs.resize(NumRecentlyUsedRegs, 0); RecentNext = RecentRegs.begin(); + avoidWAW_ = 0; } typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; @@ -180,6 +186,9 @@ namespace { SmallVector<unsigned, 4> RecentRegs; SmallVector<unsigned, 4>::iterator RecentNext; + // Last write-after-write register written. + unsigned avoidWAW_; + // Record that we just picked this register. void recordRecentlyUsed(unsigned reg) { assert(reg != 0 && "Recently used register is NOREG!"); @@ -227,8 +236,8 @@ namespace { // Determine if we skip this register due to its being recently used. bool isRecentlyUsed(unsigned reg) const { - return std::find(RecentRegs.begin(), RecentRegs.end(), reg) != - RecentRegs.end(); + return reg == avoidWAW_ || + std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); } private: @@ -1116,6 +1125,12 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { active_.push_back(std::make_pair(cur, cur->begin())); handled_.push_back(cur); + // Remember physReg for avoiding a write-after-write hazard in the next + // instruction. + if (AvoidWAWHazard && + tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) + avoidWAW_ = physReg; + // "Upgrade" the physical register since it has been allocated. UpgradeRegister(physReg); if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { @@ -1446,7 +1461,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; // Skip recently allocated registers. - if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { + if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; if (FreeReg < inactiveCounts.size()) FreeRegInactiveCount = inactiveCounts[FreeReg]; @@ -1477,7 +1492,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { + FreeRegInactiveCount < inactiveCounts[Reg] && + (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; FreeRegInactiveCount = inactiveCounts[Reg]; if (FreeRegInactiveCount == MaxInactiveCount) @@ -1528,12 +1544,10 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { return Preference; } - if (!DowngradedRegs.empty()) { - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - } + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 3d1eaf0891..6eb9002df8 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -554,6 +554,29 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } } +bool +ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + // CortexA9 has a Write-after-write hazard for NEON registers. + if (!STI.isCortexA9()) + return false; + + switch (RC->getID()) { + case ARM::DPRRegClassID: + case ARM::DPR_8RegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::QPRRegClassID: + case ARM::QPR_8RegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::SPRRegClassID: + case ARM::SPR_8RegClassID: + // Avoid reusing S, D, and Q registers. + // Don't increase register pressure for QQ and QQQQ. + return true; + default: + return false; + } +} + bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 0507396f2c..480892ed3e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -142,6 +142,8 @@ public: void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const; + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index f03282bdab..51efe51bf1 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -24,4 +24,4 @@ declare float @fabsf(float) ; CORTEXA8: test: ; CORTEXA8: vabs.f32 d1, d1 ; CORTEXA9: test: -; CORTEXA9: vabs.f32 s1, s1 +; CORTEXA9: vabs.f32 s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 749690e98d..e35103c045 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vadd.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vadd.f32 s0, s1, s0 +; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index 0c31495792..31c1ca9405 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vdiv.f32 s0, s1, s0 ; CORTEXA9: test: -; CORTEXA9: vdiv.f32 s0, s1, s0 +; CORTEXA9: vdiv.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index ef4e3e5281..bc118b8cb2 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vmul.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vmul.f32 s0, s1, s0 +; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index 1ef9f7f321..86c06f1ddd 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -5,7 +5,7 @@ define i32 @test1(float %a, float %b) { ; VFP2: test1: -; VFP2: vcvt.s32.f32 s0, s0 +; VFP2: vcvt.s32.f32 s{{.}}, s{{.}} ; NEON: test1: ; NEON: vcvt.s32.f32 d0, d0 entry: @@ -16,7 +16,7 @@ entry: define i32 @test2(float %a, float %b) { ; VFP2: test2: -; VFP2: vcvt.u32.f32 s0, s0 +; VFP2: vcvt.u32.f32 s{{.}}, s{{.}} ; NEON: test2: ; NEON: vcvt.u32.f32 d0, d0 entry: @@ -27,7 +27,7 @@ entry: define float @test3(i32 %a, i32 %b) { ; VFP2: test3: -; VFP2: vcvt.f32.u32 s0, s0 +; VFP2: vcvt.f32.u32 s{{.}}, s{{.}} ; NEON: test3: ; NEON: vcvt.f32.u32 d0, d0 entry: @@ -38,7 +38,7 @@ entry: define float @test4(i32 %a, i32 %b) { ; VFP2: test4: -; VFP2: vcvt.f32.s32 s0, s0 +; VFP2: vcvt.f32.s32 s{{.}}, s{{.}} ; NEON: test4: ; NEON: vcvt.f32.s32 d0, d0 entry: |