diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-01-18 21:15:50 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-01-18 21:15:50 +0000 |
commit | 935a91540b7aa8f29ea48fe2df657db0ce5b7d5d (patch) | |
tree | 4bb7bdd0dd219a108d1241937e852e1ba455691c /lib/Target/R600/SILowerControlFlow.cpp | |
parent | 956f13440a4aa0297606a4412f4aa091d931592a (diff) |
R600: Optimize and cleanup KILL on SI
We shouldn't insert KILL optimization if we don't have a
kill instruction at all.
Patch by: Christian König
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172845 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600/SILowerControlFlow.cpp')
-rw-r--r-- | lib/Target/R600/SILowerControlFlow.cpp | 127 |
1 files changed, 84 insertions, 43 deletions
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 3fbe653a49..3780e4051a 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -68,7 +68,10 @@ private: static char ID; const TargetInstrInfo *TII; - void Skip(MachineInstr &MI, MachineOperand &To); + bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); + + void Skip(MachineInstr &From, MachineOperand &To); + void SkipIfDead(MachineInstr &MI); void If(MachineInstr &MI); void Else(MachineInstr &MI); @@ -78,6 +81,7 @@ private: void Loop(MachineInstr &MI); void EndCf(MachineInstr &MI); + void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); public: @@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { return new SILowerControlFlowPass(tm); } -void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { +bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, + MachineBasicBlock *To) { + unsigned NumInstr = 0; - for (MachineBasicBlock *MBB = *From.getParent()->succ_begin(); - NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty(); + for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty(); MBB = *MBB->succ_begin()) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); NumInstr < SkipThreshold && I != E; ++I) { if (I->isBundle() || !I->isBundled()) - ++NumInstr; + if (++NumInstr >= SkipThreshold) + return true; } } - if (NumInstr < SkipThreshold) + return false; +} + +void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { + + if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB())) return; DebugLoc DL = From.getDebugLoc(); @@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { .addReg(AMDGPU::EXEC); } +void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + if (!shouldSkip(&MBB, &MBB.getParent()->back())) + return; + + MachineBasicBlock::iterator Insert = &MI; + ++Insert; + + // If the exec mask is non-zero, skip the next two instructions + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(3) + .addReg(AMDGPU::EXEC); + + // Exec mask is zero: Export to NULL target... + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) + .addImm(0) + .addImm(0x09) // V_008DFC_SQ_EXP_NULL + .addImm(0) + .addImm(1) + .addImm(1) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0); + + // ... and terminate wavefront + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); +} + void SILowerControlFlowPass::If(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); @@ -242,8 +285,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) { assert(0); } +void SILowerControlFlowPass::Kill(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + // Kill is only allowed in pixel shaders + MachineFunction &MF = *MBB.getParent(); + SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + assert(Info->ShaderType == ShaderType::PIXEL); + + // Clear this pixel from the exec mask if the operand is negative + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) + .addReg(AMDGPU::SREG_LIT_0) + .addOperand(MI.getOperand(0)); + + MI.eraseFromParent(); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { - bool HaveCf = false; + + bool HaveKill = false; + unsigned Depth = 0; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { @@ -257,6 +320,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { default: break; case AMDGPU::SI_IF: + ++Depth; If(MI); break; @@ -277,14 +341,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { break; case AMDGPU::SI_LOOP: + ++Depth; Loop(MI); break; case AMDGPU::SI_END_CF: - HaveCf = true; + if (--Depth == 0 && HaveKill) { + SkipIfDead(MI); + HaveKill = false; + } EndCf(MI); break; + case AMDGPU::SI_KILL: + if (Depth == 0) + SkipIfDead(MI); + else + HaveKill = true; + Kill(MI); + break; + case AMDGPU::S_BRANCH: Branch(MI); break; @@ -292,40 +368,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { } } - // TODO: What is this good for? - unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType; - if (HaveCf && ShaderType == ShaderType::PIXEL) { - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - - MachineBasicBlock &MBB = *BI; - if (MBB.succ_empty()) { - - MachineInstr &MI = *MBB.getFirstNonPHI(); - DebugLoc DL = MI.getDebugLoc(); - - // If the exec mask is non-zero, skip the next two instructions - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(3) - .addReg(AMDGPU::EXEC); - - // Exec mask is zero: Export to NULL target... - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP)) - .addImm(0) - .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) - .addImm(1) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0); - - // ... and terminate wavefront - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM)); - } - } - } - return true; } |