diff options
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.cpp | 5 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.h | 2 | ||||
-rw-r--r-- | lib/Target/R600/AMDILBase.td | 4 | ||||
-rw-r--r-- | lib/Target/R600/Processors.td | 49 | ||||
-rw-r--r-- | lib/Target/R600/R600ControlFlowFinalizer.cpp | 45 | ||||
-rw-r--r-- | lib/Target/R600/R600Defines.h | 4 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 20 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.h | 6 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 18 | ||||
-rw-r--r-- | test/CodeGen/R600/loop-address.ll | 4 |
10 files changed, 97 insertions, 60 deletions
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 0f356a1c3f..a7e1d7b6d5 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -33,6 +33,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : DefaultSize[0] = 64; DefaultSize[1] = 1; DefaultSize[2] = 1; + HasVertexCache = false; ParseSubtargetFeatures(GPU, FS); DevName = GPU; Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit); @@ -53,6 +54,10 @@ AMDGPUSubtarget::is64bit() const { return Is64bit; } bool +AMDGPUSubtarget::hasVertexCache() const { + return HasVertexCache; +} +bool AMDGPUSubtarget::isTargetELF() const { return false; } diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 1973fc6d54..b6501a4562 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -36,6 +36,7 @@ private: bool Is32on64bit; bool DumpCode; bool R600ALUInst; + bool HasVertexCache; InstrItineraryData InstrItins; @@ -48,6 +49,7 @@ public: bool isOverride(AMDGPUDeviceInfo::Caps) const; bool is64bit() const; + bool hasVertexCache() const; // Helper functions to simplify if statements bool isTargetELF() const; diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td index c12cedcf7f..e2211106f3 100644 --- a/lib/Target/R600/AMDILBase.td +++ b/lib/Target/R600/AMDILBase.td @@ -74,6 +74,10 @@ def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", "false", "Older version of ALU instructions encoding.">; +def FeatureVertexCache : SubtargetFeature<"HasVertexCache", + "HasVertexCache", + "true", + "Specify use of dedicated vertex cache.">; //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index c2cc63ac3e..abefba2c73 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -13,23 +13,38 @@ class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features> : Processor<Name, itin, Features>; -def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>; -def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; -def : Proc<"rs880", R600_EG_Itin, [FeatureR600ALUInst]>; -def : Proc<"rv670", R600_EG_Itin, [FeatureR600ALUInst, FeatureFP64]>; -def : Proc<"rv710", R600_EG_Itin, []>; -def : Proc<"rv730", R600_EG_Itin, []>; -def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>; -def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"sumo", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; -def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; -def : Proc<"SI", SI_Itin, [Feature64BitPtr, FeatureFP64]>; +def : Proc<"", R600_EG_Itin, + [FeatureR600ALUInst, FeatureVertexCache]>; +def : Proc<"r600", R600_EG_Itin, + [FeatureR600ALUInst , FeatureVertexCache]>; +def : Proc<"rs880", R600_EG_Itin, + [FeatureR600ALUInst]>; +def : Proc<"rv670", R600_EG_Itin, + [FeatureR600ALUInst, FeatureFP64, FeatureVertexCache]>; +def : Proc<"rv710", R600_EG_Itin, + [FeatureVertexCache]>; +def : Proc<"rv730", R600_EG_Itin, + [FeatureVertexCache]>; +def : Proc<"rv770", R600_EG_Itin, + [FeatureFP64, FeatureVertexCache]>; +def : Proc<"cedar", R600_EG_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"redwood", R600_EG_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"sumo", R600_EG_Itin, + [FeatureByteAddress, FeatureImages]>; +def : Proc<"juniper", R600_EG_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"cypress", R600_EG_Itin, + [FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache]>; +def : Proc<"barts", R600_EG_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"turks", R600_EG_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"caicos", R600_EG_Itin, + [FeatureByteAddress, FeatureImages]>; +def : Proc<"cayman", R600_EG_Itin, + [FeatureByteAddress, FeatureImages, FeatureFP64]>;def : Proc<"SI", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"tahiti", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"pitcairn", SI_Itin, [Feature64BitPtr, FeatureFP64]>; def : Proc<"verde", SI_Itin, [Feature64BitPtr, FeatureFP64]>; diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 94736adbab..f9786121ff 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -32,6 +32,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass { private: enum ControlFlowInstruction { CF_TC, + CF_VC, CF_CALL_FS, CF_WHILE_LOOP, CF_END_LOOP, @@ -48,39 +49,6 @@ private: unsigned MaxFetchInst; const AMDGPUSubtarget &ST; - bool isFetch(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - case AMDGPU::TEX_VTX_CONSTBUF: - case AMDGPU::TEX_VTX_TEXBUF: - case AMDGPU::TEX_LD: - case AMDGPU::TEX_GET_TEXTURE_RESINFO: - case AMDGPU::TEX_GET_GRADIENTS_H: - case AMDGPU::TEX_GET_GRADIENTS_V: - case AMDGPU::TEX_SET_GRADIENTS_H: - case AMDGPU::TEX_SET_GRADIENTS_V: - case AMDGPU::TEX_SAMPLE: - case AMDGPU::TEX_SAMPLE_C: - case AMDGPU::TEX_SAMPLE_L: - case AMDGPU::TEX_SAMPLE_C_L: - case AMDGPU::TEX_SAMPLE_LB: - case AMDGPU::TEX_SAMPLE_C_LB: - case AMDGPU::TEX_SAMPLE_G: - case AMDGPU::TEX_SAMPLE_C_G: - case AMDGPU::TXD: - case AMDGPU::TXD_SHADOW: - case AMDGPU::VTX_READ_GLOBAL_8_eg: - case AMDGPU::VTX_READ_GLOBAL_32_eg: - case AMDGPU::VTX_READ_GLOBAL_128_eg: - case AMDGPU::VTX_READ_PARAM_8_eg: - case AMDGPU::VTX_READ_PARAM_16_eg: - case AMDGPU::VTX_READ_PARAM_32_eg: - case AMDGPU::VTX_READ_PARAM_128_eg: - return true; - default: - return false; - } - } - bool IsTrivialInst(MachineInstr *MI) const { switch (MI->getOpcode()) { case AMDGPU::KILL: @@ -98,6 +66,9 @@ private: case CF_TC: Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; break; + case CF_VC: + Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600; + break; case CF_CALL_FS: Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600; break; @@ -139,17 +110,19 @@ private: unsigned CfAddress) const { MachineBasicBlock::iterator ClauseHead = I; unsigned AluInstCount = 0; + bool IsTex = TII->usesTextureCache(ClauseHead); for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { if (IsTrivialInst(I)) continue; - if (!isFetch(I)) + if ((IsTex && !TII->usesTextureCache(I)) || + (!IsTex && !TII->usesVertexCache(I))) break; AluInstCount ++; if (AluInstCount > MaxFetchInst) break; } BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), - getHWInstrDesc(CF_TC)) + getHWInstrDesc(IsTex?CF_TC:CF_VC)) .addImm(CfAddress) // ADDR .addImm(AluInstCount); // COUNT return I; @@ -211,7 +184,7 @@ public: } for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { - if (isFetch(I)) { + if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { DEBUG(dbgs() << CfCount << ":"; I->dump();); I = MakeFetchClause(MBB, I, 0); CfCount++; diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index 16cfcf59eb..bdda232521 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -39,7 +39,9 @@ namespace R600_InstFlag { //FlagOperand bits 7, 8 NATIVE_OPERANDS = (1 << 9), OP1 = (1 << 10), - OP2 = (1 << 11) + OP2 = (1 << 11), + VTX_INST = (1 << 12), + TEX_INST = (1 << 13) }; } diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index b232188a26..1800d200bb 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -29,7 +29,8 @@ using namespace llvm; R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) : AMDGPUInstrInfo(tm), - RI(tm, *this) + RI(tm, *this), + ST(tm.getSubtarget<AMDGPUSubtarget>()) { } const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { @@ -139,6 +140,23 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::OP3)); } +bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { + return ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST; +} + +bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { + return usesVertexCache(MI->getOpcode()); +} + +bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { + return (!ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST) || + (get(Opcode).TSFlags & R600_InstFlag::TEX_INST); +} + +bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { + return usesTextureCache(MI->getOpcode()); +} + bool R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) const { diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index dbae90013d..2146788604 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -33,6 +33,7 @@ namespace llvm { class R600InstrInfo : public AMDGPUInstrInfo { private: const R600RegisterInfo RI; + const AMDGPUSubtarget &ST; int getBranchInstr(const MachineOperand &op) const; @@ -53,6 +54,11 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool usesVertexCache(unsigned Opcode) const; + bool usesVertexCache(const MachineInstr *MI) const; + bool usesTextureCache(unsigned Opcode) const; + bool usesTextureCache(const MachineInstr *MI) const; + bool fitsConstReadLimitations(const std::vector<unsigned>&) const; bool canBundle(const std::vector<MachineInstr *> &) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index f8e105c6b2..18760cb3e6 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -25,6 +25,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern, bit Op1 = 0; bit Op2 = 0; bit HasNativeOperands = 0; + bit VTXInst = 0; + bit TEXInst = 0; let Namespace = "AMDGPU"; let OutOperandList = outs; @@ -43,6 +45,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern, let TSFlags{9} = HasNativeOperands; let TSFlags{10} = Op1; let TSFlags{11} = Op2; + let TSFlags{12} = VTXInst; + let TSFlags{13} = TEXInst; } class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : @@ -478,6 +482,8 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern, let COORD_TYPE_Y = 0; let COORD_TYPE_Z = 0; let COORD_TYPE_W = 0; + + let TEXInst = 1; } } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 @@ -1784,6 +1790,8 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> // VTX_WORD3 (Padding) // // Inst{127-96} = 0; + + let VTXInst = 1; } class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> @@ -2012,15 +2020,17 @@ def TXD: InstR600 < (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))], NullALU> { ->; + let TEXInst = 1; +} def TXD_SHADOW: InstR600 < (outs R600_Reg128:$dst), (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], NullALU ->; - +> { + let TEXInst = 1; +} } // End isPseudo = 1 } // End usesCustomInserter = 1 @@ -2106,6 +2116,7 @@ def TEX_VTX_CONSTBUF : // VTX_WORD3 (Padding) // // Inst{127-96} = 0; + let VTXInst = 1; } def TEX_VTX_TEXBUF: @@ -2159,6 +2170,7 @@ let Inst{63-32} = Word1; // VTX_WORD3 (Padding) // // Inst{127-96} = 0; + let VTXInst = 1; } diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll index dc9295e8e7..a3986b2362 100644 --- a/test/CodeGen/R600/loop-address.ll +++ b/test/CodeGen/R600/loop-address.ll @@ -1,10 +1,10 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: TEX +;CHECK: VTX ;CHECK: ALU_PUSH ;CHECK: JUMP @4 ;CHECK: ELSE @16 -;CHECK: TEX +;CHECK: VTX ;CHECK: LOOP_START_DX10 @15 ;CHECK: LOOP_BREAK @14 ;CHECK: POP @16 |