diff options
author | Bill Wendling <isanbard@gmail.com> | 2010-06-23 23:00:16 +0000 |
---|---|---|
committer | Bill Wendling <isanbard@gmail.com> | 2010-06-23 23:00:16 +0000 |
commit | 4b722108e2cf8e77157e0879a23789cd44829933 (patch) | |
tree | ba80a80ba5136b9294fdf239c0cf081a4dad27ff | |
parent | 8ff72b534436a1f4f221ddb21cf0d22c5b09769c (diff) |
We are missing opportunites to use ldm. Take code like this:
void t(int *cp0, int *cp1, int *dp, int fmd) {
int c0, c1, d0, d1, d2, d3;
c0 = (*cp0++ & 0xffff) | ((*cp1++ << 16) & 0xffff0000);
c1 = (*cp0++ & 0xffff) | ((*cp1++ << 16) & 0xffff0000);
/* ... */
}
It code gens into something pretty bad. But with this change (analogous to the
X86 back-end), it will use ldm and generate few instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106693 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 101 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.h | 20 | ||||
-rw-r--r-- | test/CodeGen/ARM/lsr-on-unrolled-loops.ll | 8 |
3 files changed, 126 insertions, 3 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8d3ad8095e..b9bc0b8f22 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1306,6 +1306,107 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } +/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to +/// determine if two loads are loading from the same base address. It should +/// only return true if the base pointers are the same and the only differences +/// between the two addresses is the offset. It also returns the offsets by +/// reference. +bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, + int64_t &Offset2) const { + // Don't worry about Thumb: just ARM and Thumb2. + if (Subtarget.isThumb1Only()) return false; + + if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) + return false; + + switch (Load1->getMachineOpcode()) { + default: + return false; + case ARM::LDR: + case ARM::LDRB: + case ARM::LDRD: + case ARM::LDRH: + case ARM::LDRSB: + case ARM::LDRSH: + case ARM::VLDRD: + case ARM::VLDRS: + case ARM::t2LDRi8: + case ARM::t2LDRDi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRi12: + case ARM::t2LDRSHi12: + break; + } + + switch (Load2->getMachineOpcode()) { + default: + return false; + case ARM::LDR: + case ARM::LDRB: + case ARM::LDRD: + case ARM::LDRH: + case ARM::LDRSB: + case ARM::LDRSH: + case ARM::VLDRD: + case ARM::VLDRS: + case ARM::t2LDRi8: + case ARM::t2LDRDi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRi12: + case ARM::t2LDRSHi12: + break; + } + + // Check if base addresses and chain operands match. + if (Load1->getOperand(0) != Load2->getOperand(0) || + Load1->getOperand(4) != Load2->getOperand(4)) + return false; + + // Index should be Reg0. + if (Load1->getOperand(3) != Load2->getOperand(3)) + return false; + + // Determine the offsets. + if (isa<ConstantSDNode>(Load1->getOperand(1)) && + isa<ConstantSDNode>(Load2->getOperand(1))) { + Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); + Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); + return true; + } + + return false; +} + +/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to +/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should +/// be scheduled togther. On some targets if two loads are loading from +/// addresses in the same cache line, it's better if they are scheduled +/// together. This function takes two integers that represent the load offsets +/// from the common base address. It returns true if it decides it's desirable +/// to schedule the two loads together. "NumLoads" is the number of loads that +/// have already been scheduled after Load1. +bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + // Don't worry about Thumb: just ARM and Thumb2. + if (Subtarget.isThumb1Only()) return false; + + assert(Offset2 > Offset1); + + if ((Offset2 - Offset1) / 8 > 64) + return false; + + if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + return false; // FIXME: overly conservative? + + // Four loads in a row should be sufficient. + if (NumLoads >= 3) + return false; + + return true; +} + bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index bc82e145b3..d7d9f525c5 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -320,6 +320,26 @@ public: virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1) const; + /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to + /// determine if two loads are loading from the same base address. It should + /// only return true if the base pointers are the same and the only + /// differences between the two addresses is the offset. It also returns the + /// offsets by reference. + virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, int64_t &Offset2)const; + + /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to + /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// be scheduled togther. On some targets if two loads are loading from + /// addresses in the same cache line, it's better if they are scheduled + /// together. This function takes two integers that represent the load offsets + /// from the common base address. It returns true if it decides it's desirable + /// to schedule the two loads together. "NumLoads" is the number of loads that + /// have already been scheduled after Load1. + virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const; + virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const; diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index e01617a5f0..25cf1356d6 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -626,9 +626,11 @@ bb24: ; preds = %bb23 ; LSR should use count-down iteration to avoid requiring the trip count ; in a register, and it shouldn't require any reloads here. -; CHECK: subs r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: bne.w +; CHECK: @ %bb24 +; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1 +; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0 +; CHECK-NEXT: bne.w %92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1] %indvar.next79 = add i32 %indvar78, 1 ; <i32> [#uses=1] |