aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/R600
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-04-03 14:07:16 +0000
committerAlexander Kornienko <alexfh@google.com>2013-04-03 14:07:16 +0000
commite133bc868944822bf8961f825d3aa63d6fa48fb7 (patch)
treeebbd4a8040181471467a9737d90d94dc6b58b316 /lib/Target/R600
parent647735c781c5b37061ee03d6e9e6c7dda92218e2 (diff)
parent080e3c523e87ec68ca1ea5db4cd49816028dd8bd (diff)
Updating branches/google/stable to r178511stable
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/stable@178655 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600')
-rw-r--r--lib/Target/R600/AMDGPU.h2
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td4
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.cpp22
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.h29
-rw-r--r--lib/Target/R600/AMDGPUStructurizeCFG.cpp6
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp2
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp33
-rw-r--r--lib/Target/R600/CMakeLists.txt3
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp2
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp201
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp4
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp264
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp253
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp12
-rw-r--r--lib/Target/R600/R600ISelLowering.h1
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp54
-rw-r--r--lib/Target/R600/R600InstrInfo.h3
-rw-r--r--lib/Target/R600/R600Instructions.td330
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp6
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h6
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp75
-rw-r--r--lib/Target/R600/R600MachineScheduler.h3
-rw-r--r--lib/Target/R600/R600RegisterInfo.td63
-rw-r--r--lib/Target/R600/SIISelLowering.cpp116
-rw-r--r--lib/Target/R600/SIISelLowering.h4
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp16
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp42
-rw-r--r--lib/Target/R600/SIInstrInfo.h4
-rw-r--r--lib/Target/R600/SIInstrInfo.td55
-rw-r--r--lib/Target/R600/SIInstructions.td310
-rw-r--r--lib/Target/R600/SIIntrinsics.td12
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp134
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp20
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h7
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp5
-rw-r--r--lib/Target/R600/SIRegisterInfo.h3
-rw-r--r--lib/Target/R600/SIRegisterInfo.td14
38 files changed, 1697 insertions, 425 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index e099a9fc31..0b01433cc9 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,8 @@ class AMDGPUTargetMachine;
// R600 Passes
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 5995b6f5e8..a266df535d 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -60,6 +60,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Expand);
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index a59c775272..e740348717 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
>;
-class Vector_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
+class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
new file mode 100644
index 0000000000..0223ec8e4f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -0,0 +1,22 @@
+#include "AMDGPUMachineFunction.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
+
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
+ MachineFunctionInfo() {
+ AttributeSet Set = MF.getFunction()->getAttributes();
+ Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
+ ShaderTypeAttribute);
+
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ if (Str.getAsInteger(0, ShaderType))
+ llvm_unreachable("Can't parse shader type!");
+ }
+}
+
+}
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
new file mode 100644
index 0000000000..21c8c51dae
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -0,0 +1,29 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMACHINEFUNCTION_H
+#define AMDGPUMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AMDGPUMachineFunction : public MachineFunctionInfo {
+private:
+ static const char *ShaderTypeAttribute;
+public:
+ AMDGPUMachineFunction(const MachineFunction &MF);
+ unsigned ShaderType;
+};
+
+}
+#endif // AMDGPUMACHINEFUNCTION_H
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
index b723433c16..dea43b874c 100644
--- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -17,6 +17,7 @@
#include "AMDGPU.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
@@ -40,13 +41,14 @@ typedef SmallVector<BBValuePair, 2> BBValueVector;
typedef SmallPtrSet<BasicBlock *, 8> BBSet;
-typedef DenseMap<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap;
+
typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
typedef DenseMap<BasicBlock *, Value *> BBPredicates;
typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
-typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap;
// The name for newly created blocks.
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 0185747544..e7ea876e2a 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -151,7 +151,9 @@ bool AMDGPUPassConfig::addPreEmitPass() {
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
addPass(createAMDGPUCFGPreparationPass(*TM));
addPass(createAMDGPUCFGStructurizerPass(*TM));
+ addPass(createR600EmitClauseMarkers(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(createR600ControlFlowFinalizer(*TM));
addPass(&FinalizeMachineBundlesID);
} else {
addPass(createSILowerControlFlowPass(*TM));
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 0c7880d232..fa8f62de9c 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
SDValue Operand = Ops[OperandIdx[i] - 1];
switch (Operand.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
- if (i == 2)
- break;
SDValue CstOffset;
- if (!Operand.getValueType().isVector() &&
- SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
- Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
- Ops[SelIdx[i] - 1] = CstOffset;
- return true;
+ if (Operand.getValueType().isVector() ||
+ !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+ break;
+
+ // Gather others constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
+ break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
}
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts))
+ break;
+
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
}
- break;
case ISD::FNEG:
if (NegIdx[i] < 0)
break;
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 63c59e1cb5..8efba5846b 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_target(R600CodeGen
AMDGPUFrameLowering.cpp
AMDGPUIndirectAddressing.cpp
AMDGPUMCInstLower.cpp
+ AMDGPUMachineFunction.cpp
AMDGPUSubtarget.cpp
AMDGPUStructurizeCFG.cpp
AMDGPUTargetMachine.cpp
@@ -34,6 +35,8 @@ add_llvm_target(R600CodeGen
AMDGPUConvertToISA.cpp
AMDGPUInstrInfo.cpp
AMDGPURegisterInfo.cpp
+ R600ControlFlowFinalizer.cpp
+ R600EmitClauseMarkers.cpp
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 4d3d3e7945..b7cdd7c8cd 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
//===--- Dwarf Emission Directives -----------------------------------===//
HasLEB128 = true;
SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::None;
- DwarfUsesInlineInfoSection = false;
DwarfSectionOffsetDirective = ".offset";
}
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index d20716000d..6ef4d40934 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -66,8 +66,6 @@ private:
void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
raw_ostream &OS) const;
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
- void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const;
void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
@@ -103,7 +101,8 @@ enum InstrTypes {
INSTR_FC,
INSTR_NATIVE,
INSTR_VTX,
- INSTR_EXPORT
+ INSTR_EXPORT,
+ INSTR_CFALU
};
enum FCInstr {
@@ -140,9 +139,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
- if (isTexOp(MI.getOpcode())) {
- EmitTexInstr(MI, Fixups, OS);
- } else if (isFCOp(MI.getOpcode())){
+ if (isFCOp(MI.getOpcode())){
EmitFCInstr(MI, OS);
} else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::BUNDLE ||
@@ -150,6 +147,10 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
return;
} else {
switch(MI.getOpcode()) {
+ case AMDGPU::STACK_SIZE: {
+ EmitByte(MI.getOperand(0).getImm(), OS);
+ break;
+ }
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
@@ -175,6 +176,77 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(InstWord2, OS);
break;
}
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V: {
+ unsigned Opcode = MI.getOpcode();
+ bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+ unsigned OpOffset = HasOffsets ? 3 : 0;
+ int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+ int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+ uint32_t SrcSelect[4] = {0, 1, 2, 3};
+ uint32_t Offsets[3] = {0, 0, 0};
+ uint64_t CoordType[4] = {1, 1, 1, 1};
+
+ if (HasOffsets)
+ for (unsigned i = 0; i < 3; i++)
+ Offsets[i] = MI.getOperand(i + 2).getImm();
+
+ if (TextureType == TEXTURE_RECT ||
+ TextureType == TEXTURE_SHADOWRECT) {
+ CoordType[ELEMENT_X] = 0;
+ CoordType[ELEMENT_Y] = 0;
+ }
+
+ if (TextureType == TEXTURE_1D_ARRAY ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+ Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+ CoordType[ELEMENT_Y] = 0;
+ } else {
+ CoordType[ELEMENT_Z] = 0;
+ SrcSelect[ELEMENT_Z] = ELEMENT_Y;
+ }
+ } else if (TextureType == TEXTURE_2D_ARRAY ||
+ TextureType == TEXTURE_SHADOW2D_ARRAY) {
+ CoordType[ELEMENT_Z] = 0;
+ }
+
+
+ if ((TextureType == TEXTURE_SHADOW1D ||
+ TextureType == TEXTURE_SHADOW2D ||
+ TextureType == TEXTURE_SHADOWRECT ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+ SrcSelect[ELEMENT_W] = ELEMENT_Z;
+ }
+
+ uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+ CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+ CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+ uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+ SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+ SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+ Offsets[2] << 10;
+
+ EmitByte(INSTR_TEX, OS);
+ Emit(Word01, OS);
+ Emit(Word2, OS);
+ break;
+ }
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz:
case AMDGPU::EG_ExportBuf:
@@ -184,7 +256,29 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(Inst, OS);
break;
}
-
+ case AMDGPU::CF_ALU:
+ case AMDGPU::CF_ALU_PUSH_BEFORE: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_CFALU, OS);
+ Emit(Inst, OS);
+ break;
+ }
+ case AMDGPU::CF_TC:
+ case AMDGPU::CF_VC:
+ case AMDGPU::CF_CALL_FS:
+ return;
+ case AMDGPU::WHILE_LOOP:
+ case AMDGPU::END_LOOP:
+ case AMDGPU::LOOP_BREAK:
+ case AMDGPU::CF_CONTINUE:
+ case AMDGPU::CF_JUMP:
+ case AMDGPU::CF_ELSE:
+ case AMDGPU::POP: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(Inst, OS);
+ break;
+ }
default:
EmitALUInstr(MI, Fixups, OS);
break;
@@ -334,99 +428,6 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
Emit(InlineConstant.i, OS);
}
-void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const {
-
- unsigned Opcode = MI.getOpcode();
- bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
- unsigned OpOffset = hasOffsets ? 3 : 0;
- int64_t Resource = MI.getOperand(OpOffset + 2).getImm();
- int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
- int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
- unsigned srcSelect[4] = {0, 1, 2, 3};
-
- // Emit instruction type
- EmitByte(1, OS);
-
- // Emit instruction
- EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
-
- // Emit resource id
- EmitByte(Resource, OS);
-
- // Emit source register
- EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
-
- // XXX: Emit src isRelativeAddress
- EmitByte(0, OS);
-
- // Emit destination register
- EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
-
- // XXX: Emit dst isRealtiveAddress
- EmitByte(0, OS);
-
- // XXX: Emit dst select
- EmitByte(0, OS); // X
- EmitByte(1, OS); // Y
- EmitByte(2, OS); // Z
- EmitByte(3, OS); // W
-
- // XXX: Emit lod bias
- EmitByte(0, OS);
-
- // XXX: Emit coord types
- unsigned coordType[4] = {1, 1, 1, 1};
-
- if (TextureType == TEXTURE_RECT
- || TextureType == TEXTURE_SHADOWRECT) {
- coordType[ELEMENT_X] = 0;
- coordType[ELEMENT_Y] = 0;
- }
-
- if (TextureType == TEXTURE_1D_ARRAY
- || TextureType == TEXTURE_SHADOW1D_ARRAY) {
- if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
- coordType[ELEMENT_Y] = 0;
- } else {
- coordType[ELEMENT_Z] = 0;
- srcSelect[ELEMENT_Z] = ELEMENT_Y;
- }
- } else if (TextureType == TEXTURE_2D_ARRAY
- || TextureType == TEXTURE_SHADOW2D_ARRAY) {
- coordType[ELEMENT_Z] = 0;
- }
-
- for (unsigned i = 0; i < 4; i++) {
- EmitByte(coordType[i], OS);
- }
-
- // XXX: Emit offsets
- if (hasOffsets)
- for (unsigned i = 2; i < 5; i++)
- EmitByte(MI.getOperand(i).getImm()<<1, OS);
- else
- EmitNullBytes(3, OS);
-
- // Emit sampler id
- EmitByte(Sampler, OS);
-
- // XXX:Emit source select
- if ((TextureType == TEXTURE_SHADOW1D
- || TextureType == TEXTURE_SHADOW2D
- || TextureType == TEXTURE_SHADOWRECT
- || TextureType == TEXTURE_SHADOW1D_ARRAY)
- && Opcode != AMDGPU::TEX_SAMPLE_C_L
- && Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
- srcSelect[ELEMENT_W] = ELEMENT_Z;
- }
-
- for (unsigned i = 0; i < 4; i++) {
- EmitByte(srcSelect[i], OS);
- }
-}
-
void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
// Emit instruction type
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index e27abccbe1..5af83209a0 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -39,8 +39,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
- const MCSubtargetInfo &STI;
- MCContext &Ctx;
/// \brief Can this operand also contain immediate values?
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
@@ -51,7 +49,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
const MCSubtargetInfo &sti, MCContext &ctx)
- : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+ : MCII(mcii), MRI(mri) { }
~SIMCCodeEmitter() { }
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
new file mode 100644
index 0000000000..bd87d741ec
--- /dev/null
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,264 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+ unsigned MaxFetchInst;
+
+ bool isFetch(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF:
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TXD:
+ case AMDGPU::TXD_SHADOW:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool IsTrivialInst(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::KILL:
+ case AMDGPU::RETURN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ MachineBasicBlock::iterator
+ MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned CfAddress) const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ unsigned AluInstCount = 0;
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+ if (IsTrivialInst(I))
+ continue;
+ if (!isFetch(I))
+ break;
+ AluInstCount ++;
+ if (AluInstCount > MaxFetchInst)
+ break;
+ }
+ BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+ TII->get(AMDGPU::CF_TC))
+ .addImm(CfAddress) // ADDR
+ .addImm(AluInstCount); // COUNT
+ return I;
+ }
+ void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::WHILE_LOOP:
+ MI->getOperand(0).setImm(Addr + 1);
+ break;
+ default:
+ MI->getOperand(0).setImm(Addr);
+ break;
+ }
+ }
+ void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
+ const {
+ for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
+ It != E; ++It) {
+ MachineInstr *MI = *It;
+ CounterPropagateAddr(MI, Addr);
+ }
+ }
+
+public:
+ R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+ const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
+ MaxFetchInst = 8;
+ else
+ MaxFetchInst = 16;
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ unsigned MaxStack = 0;
+ unsigned CurrentStack = 0;
+ for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
+ ++MB) {
+ MachineBasicBlock &MBB = *MB;
+ unsigned CfCount = 0;
+ std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+ std::vector<std::pair<unsigned, MachineInstr *> > IfThenElseStack;
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ if (MFI->ShaderType == 1) {
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::CF_CALL_FS));
+ CfCount++;
+ }
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E;) {
+ if (isFetch(I)) {
+ I = MakeFetchClause(MBB, I, 0);
+ CfCount++;
+ continue;
+ }
+
+ MachineBasicBlock::iterator MI = I;
+ I++;
+ switch (MI->getOpcode()) {
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ case AMDGPU::KILLGT:
+ case AMDGPU::CF_ALU:
+ CfCount++;
+ break;
+ case AMDGPU::WHILELOOP: {
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::WHILE_LOOP))
+ .addImm(0);
+ std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+ std::set<MachineInstr *>());
+ Pair.second.insert(MIb);
+ LoopStack.push_back(Pair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDLOOP: {
+ CurrentStack--;
+ std::pair<unsigned, std::set<MachineInstr *> > Pair =
+ LoopStack.back();
+ LoopStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+ .addImm(Pair.first + 1);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::IF_PREDICATE_SET: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_JUMP))
+ .addImm(0)
+ .addImm(0);
+ std::pair<unsigned, MachineInstr *> Pair(CfCount, MIb);
+ IfThenElseStack.push_back(Pair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ELSE: {
+ std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_ELSE))
+ .addImm(0)