aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp59
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h4
-rw-r--r--test/CodeGen/X86/coalescer-commute5.ll21
3 files changed, 77 insertions, 7 deletions
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index d7c2d962b5..65c8a5b99b 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -310,11 +310,26 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
NewMI->getOperand(OpIdx).setIsKill();
- // Update uses of IntA of the specific Val# with IntB.
bool BHasPHIKill = BValNo->hasPHIKill;
SmallVector<VNInfo*, 4> BDeadValNos;
SmallVector<unsigned, 4> BKills;
std::map<unsigned, unsigned> BExtend;
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+ //
+ // then do not add kills of A to the newly created B interval.
+ bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
+ if (Extended)
+ BExtend[ALR->end] = BLR->end;
+
+ // Update uses of IntA of the specific Val# with IntB.
for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
UE = mri_->use_end(); UI != UE;) {
MachineOperand &UseMO = UI.getOperand();
@@ -329,8 +344,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
UseMO.setReg(NewReg);
if (UseMI == CopyMI)
continue;
- if (UseMO.isKill())
- BKills.push_back(li_->getUseIndex(UseIdx)+1);
+ if (UseMO.isKill()) {
+ if (Extended)
+ UseMO.setIsKill(false);
+ else
+ BKills.push_back(li_->getUseIndex(UseIdx)+1);
+ }
unsigned SrcReg, DstReg;
if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg))
continue;
@@ -347,9 +366,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
JoinedCopies.insert(UseMI);
// If this is a kill but it's going to be removed, the last use
// of the same val# is the new kill.
- if (UseMO.isKill()) {
+ if (UseMO.isKill())
BKills.pop_back();
- }
}
}
@@ -451,6 +469,29 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
}
}
+/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
+/// due to live range lengthening as the result of coalescing.
+void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
+ LiveInterval &LI) {
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
+ UE = mri_->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (UseMO.isKill()) {
+ MachineInstr *UseMI = UseMO.getParent();
+ unsigned SReg, DReg;
+ if (!tii_->isMoveInstr(*UseMI, SReg, DReg))
+ continue;
+ unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
+ if (JoinedCopies.count(UseMI))
+ continue;
+ LiveInterval::const_iterator UI = LI.FindLiveRangeContaining(UseIdx);
+ assert(UI != LI.end());
+ if (!LI.isKill(UI->valno, UseIdx+1))
+ UseMO.setIsKill(false);
+ }
+ }
+}
+
/// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially
/// extended by a dead copy. Mark the last use (if any) of the val# as kill
/// as ends the live range there. If there isn't another use, then this
@@ -803,6 +844,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// Remember to delete the copy instruction.
JoinedCopies.insert(CopyMI);
+ // Some live range has been lengthened due to colaescing, eliminate the
+ // unnecessary kills.
+ RemoveUnnecessaryKills(SrcReg, *ResDstInt);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ RemoveUnnecessaryKills(DstReg, *ResDstInt);
+
// SrcReg is guarateed to be the register whose live interval that is
// being merged.
li_->removeInterval(SrcReg);
@@ -1481,8 +1528,6 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
}
-/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
-/// due to live range lengthening as the result of coalescing.
void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
if (TargetRegisterInfo::isPhysicalRegister(reg))
cerr << tri_->getName(reg);
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 543d4704d1..cf204a538c 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -201,6 +201,10 @@ namespace llvm {
/// subregister.
void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+ /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
+ /// due to live range lengthening as the result of coalescing.
+ void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI);
+
/// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially
/// extended by a dead copy. Mark the last use (if any) of the val# as kill
/// as ends the live range there. If there isn't another use, then this
diff --git a/test/CodeGen/X86/coalescer-commute5.ll b/test/CodeGen/X86/coalescer-commute5.ll
new file mode 100644
index 0000000000..c730ea76e9
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-commute5.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+
+define i32 @t() {
+entry:
+ br i1 true, label %bb1664, label %bb1656
+bb1656: ; preds = %entry
+ ret i32 0
+bb1664: ; preds = %entry
+ %tmp4297 = bitcast <16 x i8> zeroinitializer to <2 x i64> ; <<2 x i64>> [#uses=2]
+ %tmp4351 = call <16 x i8> @llvm.x86.sse2.pcmpeq.b( <16 x i8> zeroinitializer, <16 x i8> zeroinitializer ) nounwind readnone ; <<16 x i8>> [#uses=0]
+ br i1 false, label %bb5310, label %bb4743
+bb4743: ; preds = %bb1664
+ %tmp4360.not28 = or <2 x i64> zeroinitializer, %tmp4297 ; <<2 x i64>> [#uses=1]
+ br label %bb5310
+bb5310: ; preds = %bb4743, %bb1664
+ %tmp4360.not28.pn = phi <2 x i64> [ %tmp4360.not28, %bb4743 ], [ %tmp4297, %bb1664 ] ; <<2 x i64>> [#uses=1]
+ %tmp4415.not.pn = or <2 x i64> zeroinitializer, %tmp4360.not28.pn ; <<2 x i64>> [#uses=0]
+ ret i32 0
+}
+
+declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone