aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp37
-rw-r--r--test/CodeGen/ARM/fabss.ll4
-rw-r--r--test/CodeGen/ARM/fp_convert.ll6
3 files changed, 42 insertions, 5 deletions
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index d2087f9bee..c07970d69b 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -55,6 +55,7 @@ STATISTIC(numExtends , "Number of copies extended");
STATISTIC(NumReMats , "Number of instructions re-materialized");
STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
STATISTIC(numAborts , "Number of times interval joining aborted");
+STATISTIC(NumInflated , "Number of register classes inflated");
static cl::opt<bool>
EnableJoining("join-liveintervals",
@@ -1852,7 +1853,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// Perform a final pass over the instructions and compute spill weights
// and remove identity moves.
- SmallVector<unsigned, 4> DeadDefs;
+ SmallVector<unsigned, 4> DeadDefs, InflateRegs;
for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
mbbi != mbbe; ++mbbi) {
MachineBasicBlock* mbb = mbbi;
@@ -1864,6 +1865,16 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
bool DoDelete = true;
assert(MI->isCopyLike() && "Unrecognized copy instruction");
unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Collect candidates for register class inflation.
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
+ InflateRegs.push_back(SrcReg);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
+ InflateRegs.push_back(DstReg);
+
if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
MI->getNumOperands() > 2)
// Do not delete extract_subreg, insert_subreg of physical
@@ -1905,8 +1916,12 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
DeadDefs.push_back(Reg);
+ // Remat may also enable register class inflation.
+ if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
+ InflateRegs.push_back(Reg);
+ }
if (MO.isDead())
continue;
if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
@@ -1954,6 +1969,24 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
}
}
+ // After deleting a lot of copies, register classes may be less constrained.
+ // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // DPR inflation.
+ array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+ InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+ InflateRegs.end());
+ DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+ for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+ unsigned Reg = InflateRegs[i];
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ if (MRI->recomputeRegClass(Reg, *TM)) {
+ DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+ << MRI->getRegClass(Reg)->getName() << '\n');
+ ++NumInflated;
+ }
+ }
+
DEBUG(dump());
DEBUG(LDV->dump());
if (VerifyCoalescing)
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 51efe51bf1..45c322dce8 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -22,6 +22,8 @@ declare float @fabsf(float)
; NFP0: vabs.f32 s1, s1
; CORTEXA8: test:
-; CORTEXA8: vabs.f32 d1, d1
+; CORTEXA8: vadd.f32 [[D1:d[0-9]+]]
+; CORTEXA8: vabs.f32 {{d[0-9]+}}, [[D1]]
+
; CORTEXA9: test:
; CORTEXA9: vabs.f32 s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 86c06f1ddd..7002cecf36 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -7,7 +7,8 @@ define i32 @test1(float %a, float %b) {
; VFP2: test1:
; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
; NEON: test1:
-; NEON: vcvt.s32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.s32.f32 d0, [[D0]]
entry:
%0 = fadd float %a, %b
%1 = fptosi float %0 to i32
@@ -18,7 +19,8 @@ define i32 @test2(float %a, float %b) {
; VFP2: test2:
; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
; NEON: test2:
-; NEON: vcvt.u32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.u32.f32 d0, [[D0]]
entry:
%0 = fadd float %a, %b
%1 = fptoui float %0 to i32