diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-02-05 17:09:16 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-02-05 17:09:16 +0000 |
commit | cc38cad53cfebcdfc3b4fbdd924c2a92cd9dacc0 (patch) | |
tree | d169ff27ae3ba194877a1b6339a35fc53a00b24c | |
parent | 29b15a378045762ce09642ab9dd741ece41f59a3 (diff) |
R600: Fold remaining CONST_COPY after expand pseudo inst
Patch by: Vincent Lejeune
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174395 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/AMDGPUTargetMachine.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/R600LowerConstCopy.cpp | 170 |
2 files changed, 160 insertions, 12 deletions
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 7b069e7760..2185be3c20 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -136,8 +136,8 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); - addPass(createR600LowerConstCopy(*TM)); addPass(&FinalizeMachineBundlesID); + addPass(createR600LowerConstCopy(*TM)); } else { addPass(createSILowerLiteralConstantsPass(*TM)); addPass(createSILowerControlFlowPass(*TM)); diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp index 70a2b138f9..46f2aeff4a 100644 --- a/lib/Target/R600/R600LowerConstCopy.cpp +++ b/lib/Target/R600/R600LowerConstCopy.cpp @@ -13,7 +13,6 @@ /// fold them inside vector instruction, like DOT4 or Cube ; ISel emits /// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try /// to fold them if possible or replace them by MOV otherwise. -/// TODO : Implement the folding part, using Copy Propagation algorithm. // //===----------------------------------------------------------------------===// @@ -30,6 +29,13 @@ class R600LowerConstCopy : public MachineFunctionPass { private: static char ID; const R600InstrInfo *TII; + + struct ConstPairs { + unsigned XYPair; + unsigned ZWPair; + }; + + bool canFoldInBundle(ConstPairs &UsedConst, unsigned ReadConst) const; public: R600LowerConstCopy(TargetMachine &tm); virtual bool runOnMachineFunction(MachineFunction &MF); @@ -39,27 +45,169 @@ public: char R600LowerConstCopy::ID = 0; - R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) : MachineFunctionPass(ID), TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } +bool R600LowerConstCopy::canFoldInBundle(ConstPairs &UsedConst, + unsigned ReadConst) const { + unsigned ReadConstChan = ReadConst & 3; + unsigned ReadConstIndex = ReadConst & (~3); + if (ReadConstChan < 2) { + if (!UsedConst.XYPair) { + UsedConst.XYPair = ReadConstIndex; + } + return UsedConst.XYPair == ReadConstIndex; + } else { + if (!UsedConst.ZWPair) { + UsedConst.ZWPair = ReadConstIndex; + } + return UsedConst.ZWPair == ReadConstIndex; + } +} + +static bool isControlFlow(const MachineInstr &MI) { + return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) || + (MI.getOpcode() == AMDGPU::ENDIF) || + (MI.getOpcode() == AMDGPU::ELSE) || + (MI.getOpcode() == AMDGPU::WHILELOOP) || + (MI.getOpcode() == AMDGPU::BREAK); +} + bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E;) { - MachineInstr &MI = *I; - I = llvm::next(I); - if (MI.getOpcode() != AMDGPU::CONST_COPY) + DenseMap<unsigned, MachineInstr *> RegToConstIndex; + for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), + E = MBB.instr_end(); I != E;) { + + if (I->getOpcode() == AMDGPU::CONST_COPY) { + MachineInstr &MI = *I; + I = llvm::next(I); + unsigned DstReg = MI.getOperand(0).getReg(); + DenseMap<unsigned, MachineInstr *>::iterator SrcMI = + RegToConstIndex.find(DstReg); + if (SrcMI != RegToConstIndex.end()) { + SrcMI->second->eraseFromParent(); + RegToConstIndex.erase(SrcMI); + } + MachineInstr *NewMI = + TII->buildDefaultInstruction(MBB, &MI, AMDGPU::MOV, + MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); + TII->setImmOperand(NewMI, R600Operands::SRC0_SEL, + MI.getOperand(1).getImm()); + RegToConstIndex[DstReg] = NewMI; + MI.eraseFromParent(); continue; - MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV, - MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); - NewMI->getOperand(9).setImm(MI.getOperand(1).getImm()); - MI.eraseFromParent(); + } + + std::vector<unsigned> Defs; + // We consider all Instructions as bundled because algorithm that handle + // const read port limitations inside an IG is still valid with single + // instructions. + std::vector<MachineInstr *> Bundle; + + if (I->isBundle()) { + unsigned BundleSize = I->getBundleSize(); + for (unsigned i = 0; i < BundleSize; i++) { + I = llvm::next(I); + Bundle.push_back(I); + } + } else if (TII->isALUInstr(I->getOpcode())){ + Bundle.push_back(I); + } else if (isControlFlow(*I)) { + RegToConstIndex.clear(); + I = llvm::next(I); + continue; + } else { + MachineInstr &MI = *I; + for (MachineInstr::mop_iterator MOp = MI.operands_begin(), + MOpE = MI.operands_end(); MOp != MOpE; ++MOp) { + MachineOperand &MO = *MOp; + if (!MO.isReg()) + continue; + if (MO.isDef()) { + Defs.push_back(MO.getReg()); + } else { + // Either a TEX or an Export inst, prevent from erasing def of used + // operand + RegToConstIndex.erase(MO.getReg()); + for (MCSubRegIterator SR(MO.getReg(), &TII->getRegisterInfo()); + SR.isValid(); ++SR) { + RegToConstIndex.erase(*SR); + } + } + } + } + + + R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + for(std::vector<MachineInstr *>::iterator It = Bundle.begin(), + ItE = Bundle.end(); It != ItE; ++It) { + MachineInstr *MI = *It; + if (TII->isPredicated(MI)) { + // We don't want to erase previous assignment + RegToConstIndex.erase(MI->getOperand(0).getReg()); + } else { + int WriteIDX = TII->getOperandIdx(MI->getOpcode(), R600Operands::WRITE); + if (WriteIDX < 0 || MI->getOperand(WriteIDX).getImm()) + Defs.push_back(MI->getOperand(0).getReg()); + } + } + + ConstPairs CP = {0,0}; + for (unsigned SrcOp = 0; SrcOp < 3; SrcOp++) { + for(std::vector<MachineInstr *>::iterator It = Bundle.begin(), + ItE = Bundle.end(); It != ItE; ++It) { + MachineInstr *MI = *It; + int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][0]); + if (SrcIdx < 0) + continue; + MachineOperand &MO = MI->getOperand(SrcIdx); + DenseMap<unsigned, MachineInstr *>::iterator SrcMI = + RegToConstIndex.find(MO.getReg()); + if (SrcMI != RegToConstIndex.end()) { + MachineInstr *CstMov = SrcMI->second; + int ConstMovSel = + TII->getOperandIdx(CstMov->getOpcode(), R600Operands::SRC0_SEL); + unsigned ConstIndex = CstMov->getOperand(ConstMovSel).getImm(); + if (canFoldInBundle(CP, ConstIndex)) { + TII->setImmOperand(MI, OpTable[SrcOp][1], ConstIndex); + MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST); + } else { + RegToConstIndex.erase(SrcMI); + } + } + } + } + + for (std::vector<unsigned>::iterator It = Defs.begin(), ItE = Defs.end(); + It != ItE; ++It) { + DenseMap<unsigned, MachineInstr *>::iterator SrcMI = + RegToConstIndex.find(*It); + if (SrcMI != RegToConstIndex.end()) { + SrcMI->second->eraseFromParent(); + RegToConstIndex.erase(SrcMI); + } + } + I = llvm::next(I); + } + + if (MBB.succ_empty()) { + for (DenseMap<unsigned, MachineInstr *>::iterator + DI = RegToConstIndex.begin(), DE = RegToConstIndex.end(); + DI != DE; ++DI) { + DI->second->eraseFromParent(); + } } } return false; |