diff options
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/R600/AMDILISelDAGToDAG.cpp | 33 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 54 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/R600/R600MachineScheduler.cpp | 75 | ||||
-rw-r--r-- | lib/Target/R600/R600MachineScheduler.h | 3 |
5 files changed, 90 insertions, 78 deletions
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 0c7880d232..fa8f62de9c 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, SDValue Operand = Ops[OperandIdx[i] - 1]; switch (Operand.getOpcode()) { case AMDGPUISD::CONST_ADDRESS: { - if (i == 2) - break; SDValue CstOffset; - if (!Operand.getValueType().isVector() && - SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Ops[SelIdx[i] - 1] = CstOffset; - return true; + if (Operand.getValueType().isVector() || + !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) + break; + + // Gather others constants values + std::vector<unsigned> Consts; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = OperandIdx[j]; + if (SrcIdx < 0) + break; + if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); + Consts.push_back(Cst->getZExtValue()); + } + } } + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) + break; + + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Ops[SelIdx[i] - 1] = CstOffset; + return true; } - break; case ISD::FNEG: if (NegIdx[i] < 0) break; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index be3318a0b4..08650980fd 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::OP3)); } +bool +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) + const { + assert (Consts.size() <= 12 && "Too many operands in instructions group"); + unsigned Pair1 = 0, Pair2 = 0; + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + unsigned ReadConstHalf = Consts[i] & 2; + unsigned ReadConstIndex = Consts[i] & (~3); + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; + if (!Pair1) { + Pair1 = ReadHalfConst; + continue; + } + if (Pair1 == ReadHalfConst) + continue; + if (!Pair2) { + Pair2 = ReadHalfConst; + continue; + } + if (Pair2 != ReadHalfConst) + return false; + } + return true; +} + +bool +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { + std::vector<unsigned> Consts; + for (unsigned i = 0, n = MIs.size(); i < n; i++) { + const MachineInstr *MI = MIs[i]; + + const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + if (!isALUInstr(MI->getOpcode())) + continue; + + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Const = MI->getOperand( + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Consts.push_back(Const); + } + } + } + return fitsConstReadLimitations(Consts); +} + DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { const InstrItineraryData *II = TM->getInstrItineraryData(); diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index efe721c00c..bf9569e659 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -53,6 +53,9 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool fitsConstReadLimitations(const std::vector<unsigned>&) const; + bool canBundle(const std::vector<MachineInstr *> &) const; + /// \breif Vector instructions are instructions that must fill all /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 19baef94c7..9074364bb3 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { CurInstKind = IDOther; CurEmitted = 0; OccupedSlotsMask = 15; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); InstKindLimit[IDAlu] = 120; // 120 minus 8 for security @@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { } } -class ConstPairs { -private: - unsigned XYPair; - unsigned ZWPair; -public: - ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { - for (unsigned i = 0; i < 3; i++) { - unsigned ReadConstChan = ReadConst[i] & 3; - unsigned ReadConstIndex = ReadConst[i] & (~3); - if (ReadConstChan < 2) { - if (!XYPair) { - XYPair = ReadConstIndex; - } - } else { - if (!ZWPair) { - ZWPair = ReadConstIndex; - } - } - } - } - - bool isCompatibleWith(const ConstPairs& CP) const { - return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && - (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); - } -}; - -static -const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) { - unsigned ReadConsts[3] = {0, 0, 0}; - R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - - if (!TII->isALUInstr(MI.getOpcode())) - return ConstPairs(ReadConsts); - - for (unsigned i = 0; i < 3; i++) { - int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); - if (SrcIdx < 0) - break; - if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) - ReadConsts[i] =MI.getOperand( - TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); - } - return ConstPairs(ReadConsts); -} - -bool -R600SchedStrategy::isBundleable(const MachineInstr& MI) { - const ConstPairs &MIPair = getPairs(TII, MI); - for (unsigned i = 0; i < 4; i++) { - if (!InstructionsGroupCandidate[i]) - continue; - const ConstPairs &IGPair = getPairs(TII, - *InstructionsGroupCandidate[i]->getInstr()); - if (!IGPair.isCompatibleWith(MIPair)) - return false; - } - return true; -} - SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { if (Q.empty()) return NULL; for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); It != E; ++It) { SUnit *SU = *It; - if (isBundleable(*SU->getInstr())) { + InstructionsGroupCandidate.push_back(SU->getInstr()); + if (TII->canBundle(InstructionsGroupCandidate)) { + InstructionsGroupCandidate.pop_back(); Q.erase(It); return SU; + } else { + InstructionsGroupCandidate.pop_back(); } } return NULL; @@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); + InstructionsGroupCandidate.clear(); LoadAlu(); } @@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() { SUnit *SU = AttemptFillSlot(Chan); if (SU) { OccupedSlotsMask |= (1 << Chan); - InstructionsGroupCandidate[Chan] = SU; + InstructionsGroupCandidate.push_back(SU->getInstr()); return SU; } } diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index d74ff1e076..3d0367fd8e 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -98,7 +98,7 @@ public: virtual void releaseBottomNode(SUnit *SU); private: - SUnit *InstructionsGroupCandidate[4]; + std::vector<MachineInstr *> InstructionsGroupCandidate; int getInstKind(SUnit *SU); bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; @@ -112,7 +112,6 @@ private: void AssignSlot(MachineInstr *MI, unsigned Slot); SUnit* pickAlu(); SUnit* pickOther(int QID); - bool isBundleable(const MachineInstr& MI); void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); }; |