diff options
Diffstat (limited to 'lib/Target/NVPTX')
34 files changed, 2543 insertions, 1963 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 47baef6696..7da2fed4cd 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp NVPTXAsmPrinter.cpp NVPTXUtilities.cpp + NVVMReflect.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index 454583850b..b3e8b5d262 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -52,25 +52,24 @@ enum PropertyAnnotation { }; const unsigned AnnotationNameLen = 8; // length of each annotation name -const char -PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { - "maxntidx", // PROPERTY_MAXNTID_X - "maxntidy", // PROPERTY_MAXNTID_Y - "maxntidz", // PROPERTY_MAXNTID_Z - "reqntidx", // PROPERTY_REQNTID_X - "reqntidy", // PROPERTY_REQNTID_Y - "reqntidz", // PROPERTY_REQNTID_Z - "minctasm", // PROPERTY_MINNCTAPERSM - "texture", // PROPERTY_ISTEXTURE - "surface", // PROPERTY_ISSURFACE - "sampler", // PROPERTY_ISSAMPLER - "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM - "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM - "kernel", // PROPERTY_ISKERNEL_FUNCTION - "align", // PROPERTY_ALIGN +const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { + "maxntidx", // PROPERTY_MAXNTID_X + "maxntidy", // PROPERTY_MAXNTID_Y + "maxntidz", // PROPERTY_MAXNTID_Z + "reqntidx", // PROPERTY_REQNTID_X + "reqntidy", // PROPERTY_REQNTID_Y + "reqntidz", // PROPERTY_REQNTID_Z + "minctasm", // PROPERTY_MINNCTAPERSM + "texture", // PROPERTY_ISTEXTURE + "surface", // PROPERTY_ISSURFACE + "sampler", // PROPERTY_ISSAMPLER + "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM + "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM + "kernel", // PROPERTY_ISKERNEL_FUNCTION + "align", // PROPERTY_ALIGN - // last property - "proplast", // PROPERTY_LAST + // last property + "proplast", // PROPERTY_LAST }; // name of named metadata used for global annotations @@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { // compiling those .cpp files, hence __attribute__((unused)). __attribute__((unused)) #endif -static const char* NamedMDForAnnotations = "nvvm.annotations"; + static const char *NamedMDForAnnotations = "nvvm.annotations"; } - #endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 619181994a..459cd96cb0 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -23,10 +23,9 @@ bool CompileForDebugging; // compile for debugging static cl::opt<bool, true> Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden, - cl::location(CompileForDebugging), - cl::init(false)); + cl::location(CompileForDebugging), cl::init(false)); -void NVPTXMCAsmInfo::anchor() { } +void NVPTXMCAsmInfo::anchor() {} NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Triple TheTriple(TT); @@ -55,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Data32bitsDirective = " .b32 "; Data64bitsDirective = " .b64 "; PrivateGlobalPrefix = ""; - ZeroDirective = " .b8"; + ZeroDirective = " .b8"; AsciiDirective = " .b8"; AscizDirective = " .b8"; diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 44aa01ca6e..ccd29705df 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -28,7 +28,6 @@ #define GET_REGINFO_MC_DESC #include "NVPTXGenRegisterInfo.inc" - using namespace llvm; static MCInstrInfo *createNVPTXMCInstrInfo() { @@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { return X; } -static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { +static MCSubtargetInfo * +createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); InitNVPTXMCSubtargetInfo(X, TT, CPU, FS); return X; } -static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { +static MCCodeGenInfo *createNVPTXMCCodeGenInfo( + StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); X->InitMCCodeGenInfo(RM, CM, OL); return X; } - // Force static initialization. extern "C" void LLVMInitializeNVPTXTargetMC() { // Register the MC asm info. diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h index b5684883fc..d6c79b5110 100644 --- a/lib/Target/NVPTX/ManagedStringPool.h +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_SUPPORT_MANAGED_STRING_H #define LLVM_SUPPORT_MANAGED_STRING_H diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index b46ea881c4..6a53a443bf 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -41,18 +41,24 @@ enum CondCodes { inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { switch (CC) { - case NVPTXCC::NE: return "ne"; - case NVPTXCC::EQ: return "eq"; - case NVPTXCC::LT: return "lt"; - case NVPTXCC::LE: return "le"; - case NVPTXCC::GT: return "gt"; - case NVPTXCC::GE: return "ge"; + case NVPTXCC::NE: + return "ne"; + case NVPTXCC::EQ: + return "eq"; + case NVPTXCC::LT: + return "lt"; + case NVPTXCC::LE: + return "le"; + case NVPTXCC::GT: + return "gt"; + case NVPTXCC::GE: + return "ge"; } llvm_unreachable("Unknown condition code"); } -FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, - llvm::CodeGenOpt::Level OptLevel); +FunctionPass * +createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); @@ -62,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *); extern Target TheNVPTXTarget32; extern Target TheNVPTXTarget64; -namespace NVPTX -{ +namespace NVPTX { enum DrvInterface { NVCL, CUDA, @@ -102,7 +107,7 @@ enum LoadStore { }; namespace PTXLdStInstCode { -enum AddressSpace{ +enum AddressSpace { GENERIC = 0, GLOBAL = 1, CONSTANT = 2, diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index 7aee3595c6..d78b4e81a3 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td" //===----------------------------------------------------------------------===// // SM Versions -def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10", - "Target SM 1.0">; -def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11", - "Target SM 1.1">; -def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12", - "Target SM 1.2">; -def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13", - "Target SM 1.3">; def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20", "Target SM 2.0">; def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", @@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"sm_10", [SM10]>; -def : Proc<"sm_11", [SM11]>; -def : Proc<"sm_12", [SM12]>; -def : Proc<"sm_13", [SM13]>; def : Proc<"sm_20", [SM20]>; def : Proc<"sm_21", [SM21]>; def : Proc<"sm_30", [SM30]>; diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 60f52a46da..0f792ec682 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -19,9 +19,9 @@ namespace llvm { bool NVPTXAllocaHoisting::runOnFunction(Function &function) { - bool functionModified = false; - Function::iterator I = function.begin(); - TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); + bool functionModified = false; + Function::iterator I = function.begin(); + TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); for (Function::iterator E = function.end(); I != E; ++I) { for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { @@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) { } char NVPTXAllocaHoisting::ID = 1; -RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting", - "Hoisting alloca instructions in non-entry " - "blocks to the entry block"); +RegisterPass<NVPTXAllocaHoisting> +X("alloca-hoisting", "Hoisting alloca instructions in non-entry " + "blocks to the entry block"); -FunctionPass *createAllocaHoisting() { - return new NVPTXAllocaHoisting(); -} +FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); } } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0115e1f5d3..ce5d78afa3 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -47,7 +47,6 @@ #include <sstream> using namespace llvm; - #include "NVPTXGenAsmWriter.inc" bool RegAllocNilUsed = true; @@ -59,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers", cl::desc("NVPTX Specific: Emit Line numbers even without -G"), cl::init(true)); -namespace llvm { -bool InterleaveSrcInPtx = false; -} - -static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", - cl::ZeroOrMore, - cl::desc("NVPTX Specific: Emit source line in ptx file"), - cl::location(llvm::InterleaveSrcInPtx)); +namespace llvm { bool InterleaveSrcInPtx = false; } +static cl::opt<bool, true> +InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Emit source line in ptx file"), + cl::location(llvm::InterleaveSrcInPtx)); namespace { /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V /// depends. -void DiscoverDependentGlobals(Value *V, - DenseSet<GlobalVariable*> &Globals) { +void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) Globals.insert(GV); else { @@ -88,12 +83,12 @@ void DiscoverDependentGlobals(Value *V, /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable /// instances to be emitted, but only after any dependents have been added /// first. -void VisitGlobalVariableForEmission(GlobalVariable *GV, - SmallVectorImpl<GlobalVariable*> &Order, - DenseSet<GlobalVariable*> &Visited, - DenseSet<GlobalVariable*> &Visiting) { +void VisitGlobalVariableForEmission( + GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order, + DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) { // Have we already visited this one? - if (Visited.count(GV)) return; + if (Visited.count(GV)) + return; // Do we have a circular dependency? if (Visiting.count(GV)) @@ -103,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV, Visiting.insert(GV); // Make sure we visit all dependents first - DenseSet<GlobalVariable*> Others; + DenseSet<GlobalVariable *> Others; for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) DiscoverDependentGlobals(GV->getOperand(i), Others); - - for (DenseSet<GlobalVariable*>::iterator I = Others.begin(), - E = Others.end(); I != E; ++I) + + for (DenseSet<GlobalVariable *>::iterator I = Others.begin(), + E = Others.end(); + I != E; ++I) VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); // Now we can visit ourself @@ -142,25 +138,23 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { if (CE == 0) llvm_unreachable("Unknown constant value to lower!"); - switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) if (C != CE) return LowerConstant(C, AP); // Otherwise report the problem to the user. { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - WriteAsOperand(OS, CE, /*PrintType=*/false, - !AP.MF ? 0 : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/ false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { const DataLayout &TD = *AP.TM.getDataLayout(); @@ -182,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // expression properly. This is important for differences between // blockaddress labels. Since the two labels are in the same function, it // is reasonable to treat their delta as a 32-bit value. - // FALL THROUGH. + // FALL THROUGH. case Instruction::BitCast: return LowerConstant(CE->getOperand(0), AP); @@ -192,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), - false/*ZExt*/); + false /*ZExt*/); return LowerConstant(Op, AP); } @@ -214,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // the high bits so we are sure to get a proper truncation if the input is // a constant expr. unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + const MCExpr *MaskExpr = + MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } - // The MC library also has a right-shift operator, but it isn't consistently + // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. case Instruction::Add: case Instruction::Sub: @@ -232,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); switch (CE->getOpcode()) { - default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); - case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); - case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); - case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); - case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); - case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); - case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); - case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); - case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + default: + llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: + return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Sub: + return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: + return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: + return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: + return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: + return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); + case Instruction::And: + return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); + case Instruction::Or: + return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); + case Instruction::Xor: + return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); } } } } - -void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) -{ +void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { if (!EmitLineNumbers) return; if (ignoreLoc(MI)) @@ -268,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) if (curLoc.isUnknown()) return; - const MachineFunction *MF = MI.getParent()->getParent(); //const TargetMachine &TM = MF->getTarget(); @@ -289,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) if (filenameMap.find(fileName.str()) == filenameMap.end()) return; - // Emit the line from the source file. if (llvm::InterleaveSrcInPtx) this->emitSrcInText(fileName.str(), curLoc.getLine()); std::stringstream temp; - temp << "\t.loc " << filenameMap[fileName.str()] - << " " << curLoc.getLine() << " " << curLoc.getCol(); + temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() + << " " << curLoc.getCol(); OutStreamer.EmitRawText(Twine(temp.str().c_str())); } @@ -309,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(OS.str()); } -void NVPTXAsmPrinter::printReturnValStr(const Function *F, - raw_ostream &O) -{ +void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); const TargetLowering *TLI = TM.getTargetLowering(); @@ -329,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { size = ITy->getBitWidth(); - if (size < 32) size = 32; + if (size < 32) + size = 32; } else { - assert(Ty->isFloatingPointTy() && - "Floating point type expected here"); + assert(Ty->isFloatingPointTy() && "Floating point type expected here"); size = Ty->getPrimitiveSizeInBits(); } O << ".param .b" << size << " func_retval0"; - } - else if (isa<PointerType>(Ty)) { + } else if (isa<PointerType>(Ty)) { O << ".param .b" << TLI->getPointerTy().getSizeInBits() - << " func_retval0"; + << " func_retval0"; } else { - if ((Ty->getTypeID() == Type::StructTyID) || - isa<VectorType>(Ty)) { + if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); unsigned totalsz = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - totalsz += sz/8; + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; } } unsigned retAlignment = 0; if (!llvm::getAlign(*F, 0, retAlignment)) retAlignment = TD->getABITypeAlignment(Ty); - O << ".param .align " - << retAlignment - << " .b8 func_retval0[" - << totalsz << "]"; + O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz + << "]"; } else - assert(false && - "Unknown return type"); + assert(false && "Unknown return type"); } } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); unsigned idx = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -383,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " func_retval" << idx; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; ++idx; } - if (i < e-1) + if (i < e - 1) O << ", "; } } @@ -411,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { // Set up MRI = &MF->getRegInfo(); F = MF->getFunction(); - emitLinkageDirective(F,O); + emitLinkageDirective(F, O); if (llvm::isKernelFunction(*F)) O << ".entry "; else { @@ -434,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { void NVPTXAsmPrinter::EmitFunctionBodyStart() { const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); unsigned numRegClasses = TRI.getNumRegClasses(); - VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1]; + VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1]; OutStreamer.EmitRawText(StringRef("{\n")); setAndEmitFunctionVirtualRegisters(*MF); @@ -446,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() { void NVPTXAsmPrinter::EmitFunctionBodyEnd() { OutStreamer.EmitRawText(StringRef("}\n")); - delete []VRidGlobal2LocalMap; + delete[] VRidGlobal2LocalMap; } - -void -NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F, - raw_ostream &O) const { +void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, + raw_ostream &O) const { // If the NVVM IR has some of reqntid* specified, then output // the reqntid directive, and set the unspecified ones to 1. // If none of reqntid* is specified, don't output reqntid directive. unsigned reqntidx, reqntidy, reqntidz; bool specified = false; - if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1; - else specified = true; - if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1; - else specified = true; - if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1; - else specified = true; + if (llvm::getReqNTIDx(F, reqntidx) == false) + reqntidx = 1; + else + specified = true; + if (llvm::getReqNTIDy(F, reqntidy) == false) + reqntidy = 1; + else + specified = true; + if (llvm::getReqNTIDz(F, reqntidz) == false) + reqntidz = 1; + else + specified = true; if (specified) - O << ".reqntid " << reqntidx << ", " - << reqntidy << ", " << reqntidz << "\n"; + O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz + << "\n"; // If the NVVM IR has some of maxntid* specified, then output // the maxntid directive, and set the unspecified ones to 1. // If none of maxntid* is specified, don't output maxntid directive. unsigned maxntidx, maxntidy, maxntidz; specified = false; - if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1; - else specified = true; - if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1; - else specified = true; - if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1; - else specified = true; + if (llvm::getMaxNTIDx(F, maxntidx) == false) + maxntidx = 1; + else + specified = true; + if (llvm::getMaxNTIDy(F, maxntidy) == false) + maxntidy = 1; + else + specified = true; + if (llvm::getMaxNTIDz(F, maxntidz) == false) + maxntidz = 1; + else + specified = true; if (specified) - O << ".maxntid " << maxntidx << ", " - << maxntidy << ", " << maxntidz << "\n"; + O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz + << "\n"; unsigned mincta; if (llvm::getMinCTASm(F, mincta)) O << ".minnctapersm " << mincta << "\n"; } -void -NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, - raw_ostream &O) { - const TargetRegisterClass * RC = MRI->getRegClass(vr); +void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, + raw_ostream &O) { + const TargetRegisterClass *RC = MRI->getRegClass(vr); unsigned id = RC->getID(); std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; @@ -506,44 +512,38 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, report_fatal_error("Bad register!"); } -void -NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, - raw_ostream &O) { +void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, + raw_ostream &O) { getVirtualRegisterName(vr, isVec, O); } -void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO, - const char *Modifier, - raw_ostream &O) { - static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'}; - int Imm = (int)MO.getImm(); - if(0 == strcmp(Modifier, "vecelem")) +void NVPTXAsmPrinter::printVecModifiedImmediate( + const MachineOperand &MO, const char *Modifier, raw_ostream &O) { + static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; + int Imm = (int) MO.getImm(); + if (0 == strcmp(Modifier, "vecelem")) O << "_" << vecelem[Imm]; - else if(0 == strcmp(Modifier, "vecv4comm1")) { - if((Imm < 0) || (Imm > 3)) + else if (0 == strcmp(Modifier, "vecv4comm1")) { + if ((Imm < 0) || (Imm > 3)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv4comm2")) { - if((Imm < 4) || (Imm > 7)) + } else if (0 == strcmp(Modifier, "vecv4comm2")) { + if ((Imm < 4) || (Imm > 7)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv4pos")) { - if(Imm < 0) Imm = 0; - O << "_" << vecelem[Imm%4]; - } - else if(0 == strcmp(Modifier, "vecv2comm1")) { - if((Imm < 0) || (Imm > 1)) + } else if (0 == strcmp(Modifier, "vecv4pos")) { + if (Imm < 0) + Imm = 0; + O << "_" << vecelem[Imm % 4]; + } else if (0 == strcmp(Modifier, "vecv2comm1")) { + if ((Imm < 0) || (Imm > 1)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv2comm2")) { - if((Imm < 2) || (Imm > 3)) + } else if (0 == strcmp(Modifier, "vecv2comm2")) { + if ((Imm < 2) || (Imm > 3)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv2pos")) { - if(Imm < 0) Imm = 0; - O << "_" << vecelem[Imm%2]; - } - else + } else if (0 == strcmp(Modifier, "vecv2pos")) { + if (Imm < 0) + Imm = 0; + O << "_" << vecelem[Imm % 2]; + } else llvm_unreachable("Unknown Modifier on immediate operand"); } @@ -565,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, emitVirtualRegister(MO.getReg(), true, O); else llvm_unreachable( - "Don't know how to handle the modifier on virtual register."); + "Don't know how to handle the modifier on virtual register."); } } return; @@ -576,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, else if (strstr(Modifier, "vec") == Modifier) printVecModifiedImmediate(MO, Modifier, O); else - llvm_unreachable("Don't know how to handle modifier on immediate operand"); + llvm_unreachable( + "Don't know how to handle modifier on immediate operand"); return; case MachineOperand::MO_FPImmediate: @@ -588,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_ExternalSymbol: { - const char * symbname = MO.getSymbolName(); + const char *symbname = MO.getSymbolName(); if (strstr(symbname, ".PARAM") == symbname) { unsigned index; - sscanf(symbname+6, "%u[];", &index); + sscanf(symbname + 6, "%u[];", &index); printParamName(index, O); - } - else if (strstr(symbname, ".HLPPARAM") == symbname) { + } else if (strstr(symbname, ".HLPPARAM") == symbname) { unsigned index; - sscanf(symbname+9, "%u[];", &index); + sscanf(symbname + 9, "%u[];", &index); O << *CurrentFnSym << "_param_" << index << "_offset"; - } - else + } else O << symbname; break; } @@ -613,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, } } -void NVPTXAsmPrinter:: -printImplicitDef(const MachineInstr *MI, raw_ostream &O) const { +void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI, + raw_ostream &O) const { #ifndef __OPTIMIZE__ O << "\t// Implicit def :"; //printOperand(MI, 0); @@ -628,32 +627,41 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, if (Modifier && !strcmp(Modifier, "add")) { O << ", "; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, O); } else { - if (MI->getOperand(opNum+1).isImm() && - MI->getOperand(opNum+1).getImm() == 0) + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) return; // don't print ',0' or '+0' O << "+"; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, O); } } void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, - raw_ostream &O, const char *Modifier) -{ + raw_ostream &O, const char *Modifier) { if (Modifier) { const MachineOperand &MO = MI->getOperand(opNum); - int Imm = (int)MO.getImm(); + int Imm = (int) MO.getImm(); if (!strcmp(Modifier, "volatile")) { if (Imm) O << ".volatile"; } else if (!strcmp(Modifier, "addsp")) { switch (Imm) { - case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break; - case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break; - case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break; - case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break; - case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break; + case NVPTX::PTXLdStInstCode::GLOBAL: + O << ".global"; + break; + case NVPTX::PTXLdStInstCode::SHARED: + O << ".shared"; + break; + case NVPTX::PTXLdStInstCode::LOCAL: + O << ".local"; + break; + case NVPTX::PTXLdStInstCode::PARAM: + O << ".param"; + break; + case NVPTX::PTXLdStInstCode::CONSTANT: + O << ".const"; + break; case NVPTX::PTXLdStInstCode::GENERIC: if (!nvptxSubtarget.hasGenericLdSt()) O << ".global"; @@ -661,31 +669,27 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, default: llvm_unreachable("Wrong Address Space"); } - } - else if (!strcmp(Modifier, "sign")) { - if (Imm==NVPTX::PTXLdStInstCode::Signed) + } else if (!strcmp(Modifier, "sign")) { + if (Imm == NVPTX::PTXLdStInstCode::Signed) O << "s"; - else if (Imm==NVPTX::PTXLdStInstCode::Unsigned) + else if (Imm == NVPTX::PTXLdStInstCode::Unsigned) O << "u"; else O << "f"; - } - else if (!strcmp(Modifier, "vec")) { - if (Imm==NVPTX::PTXLdStInstCode::V2) + } else if (!strcmp(Modifier, "vec")) { + if (Imm == NVPTX::PTXLdStInstCode::V2) O << ".v2"; - else if (Imm==NVPTX::PTXLdStInstCode::V4) + else if (Imm == NVPTX::PTXLdStInstCode::V4) O << ".v4"; - } - else + } else llvm_unreachable("Unknown Modifier"); - } - else + } else llvm_unreachable("Empty Modifier"); } -void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { - emitLinkageDirective(F,O); + emitLinkageDirective(F, O); if (llvm::isKernelFunction(*F)) O << ".entry "; else @@ -696,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { O << ";\n"; } -static bool usedInGlobalVarDef(const Constant *C) -{ +static bool usedInGlobalVarDef(const Constant *C) { if (!C) return false; @@ -707,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C) return true; } - for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); - ui!=ue; ++ui) { + for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); + ui != ue; ++ui) { const Constant *C = dyn_cast<Constant>(*ui); if (usedInGlobalVarDef(C)) return true; @@ -716,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C) return false; } -static bool usedInOneFunc(const User *U, Function const *&oneFunc) -{ +static bool usedInOneFunc(const User *U, Function const *&oneFunc) { if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { if (othergv->getName().str() == "llvm.used") return true; @@ -730,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) return false; oneFunc = curFunc; return true; - } - else + } else return false; } if (const MDNode *md = dyn_cast<MDNode>(U)) if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || - (md->getName().str() == "llvm.dbg.sp"))) + (md->getName().str() == "llvm.dbg.sp"))) return true; - - for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end(); - ui!=ue; ++ui) { + for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); + ui != ue; ++ui) { if (usedInOneFunc(*ui, oneFunc) == false) return false; } @@ -776,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { static bool useFuncSeen(const Constant *C, llvm::DenseMap<const Function *, bool> &seenMap) { - for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); - ui!=ue; ++ui) { + for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); + ui != ue; ++ui) { if (const Constant *cu = dyn_cast<Constant>(*ui)) { if (useFuncSeen(cu, seenMap)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { const BasicBlock *bb = I->getParent(); - if (!bb) continue; + if (!bb) + continue; const Function *caller = bb->getParent(); - if (!caller) continue; + if (!caller) + continue; if (seenMap.find(caller) != seenMap.end()) return true; } @@ -793,10 +795,9 @@ static bool useFuncSeen(const Constant *C, return false; } -void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { llvm::DenseMap<const Function *, bool> seenMap; - for (Module::const_iterator FI=M.begin(), FE=M.end(); - FI!=FE; ++FI) { + for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { const Function *F = FI; if (F->isDeclaration()) { @@ -808,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { emitDeclaration(F, O); continue; } - for (Value::const_use_iterator iter=F->use_begin(), - iterEnd=F->use_end(); iter!=iterEnd; ++iter) { + for (Value::const_use_iterator iter = F->use_begin(), + iterEnd = F->use_end(); + iter != iterEnd; ++iter) { if (const Constant *C = dyn_cast<Constant>(*iter)) { if (usedInGlobalVarDef(C)) { // The use is in the initialization of a global variable @@ -828,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { } } - if (!isa<Instruction>(*iter)) continue; + if (!isa<Instruction>(*iter)) + continue; const Instruction *instr = cast<Instruction>(*iter); const BasicBlock *bb = instr->getParent(); - if (!bb) continue; + if (!bb) + continue; const Function *caller = bb->getParent(); - if (!caller) continue; + if (!caller) + continue; // If a caller has already been seen, then the caller is // appearing in the module before the callee. so print out @@ -852,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { DebugInfoFinder DbgFinder; DbgFinder.processModule(M); - unsigned i=1; + unsigned i = 1; for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { + E = DbgFinder.compile_unit_end(); + I != E; ++I) { DICompileUnit DIUnit(*I); StringRef Filename(DIUnit.getFilename()); StringRef Dirname(DIUnit.getDirectory()); @@ -871,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) { + E = DbgFinder.subprogram_end(); + I != E; ++I) { DISubprogram SP(*I); StringRef Filename(SP.getFilename()); StringRef Dirname(SP.getDirectory()); @@ -887,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } } -bool NVPTXAsmPrinter::doInitialization (Module &M) { +bool NVPTXAsmPrinter::doInitialization(Module &M) { SmallString<128> Str1; raw_svector_ostream OS1(Str1); @@ -899,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { //bool Result = AsmPrinter::doInitialization(M); // Initialize TargetLoweringObjectFile. - const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) - .Initialize(OutContext, TM); + const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) + .Initialize(OutContext, TM); Mang = new Mangler(OutContext, *TM.getDataLayout()); @@ -908,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitHeader(M, OS1); OutStreamer.EmitRawText(OS1.str()); - // Already commented out //bool Result = AsmPrinter::doInitialization(M); - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) recordAndEmitFilenames(M); @@ -926,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { // global variable in order, and ensure that we emit it *after* its dependent // globals. We use a little extra memory maintaining both a set and a list to // have fast searches while maintaining a strict ordering. - SmallVector<GlobalVariable*,8> Globals; - DenseSet<GlobalVariable*> GVVisited; - DenseSet<GlobalVariable*> GVVisiting; + SmallVector<GlobalVariable *, 8> Globals; + DenseSet<GlobalVariable *> GVVisited; + DenseSet<GlobalVariable *> GVVisiting; // Visit each global variable, in order - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; + ++I) VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); - assert(GVVisited.size() == M.getGlobalList().size() && + assert(GVVisited.size() == M.getGlobalList().size() && "Missed a global variable"); assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); @@ -946,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { OS2 << '\n'; OutStreamer.EmitRawText(OS2.str()); - return false; // success + return false; // success } -void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { O << "//\n"; O << "// Generated by LLVM NVPTX Back-End\n"; O << "//\n"; @@ -989,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { Module::GlobalListType &global_list = M.getGlobalList(); int i, n = global_list.size(); - GlobalVariable **gv_array = new GlobalVariable* [n]; + GlobalVariable **gv_array = new GlobalVariable *[n]; // first, back-up GlobalVariable in gv_array i = 0; for (Module::global_iterator I = global_list.begin(), E = global_list.end(); - I != E; ++I) + I != E; ++I) gv_array[i++] = &*I; // second, empty global_list @@ -1005,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { bool ret = AsmPrinter::doFinalization(M); // now we restore global variables - for (i = 0; i < n; i ++) + for (i = 0; i < n; i++) global_list.insert(global_list.end(), gv_array[i]); delete[] gv_array; return ret; - //bool Result = AsmPrinter::doFinalization(M); // Instead of calling the parents doFinalization, we may // clone parents doFinalization and customize here. @@ -1031,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { // external without init -> .extern // appending -> not allowed, assert. -void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) -{ +void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, + raw_ostream &O) { if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { if (V->hasExternalLinkage()) { if (isa<GlobalVariable>(V)) { @@ -1059,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) } } - -void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, +void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool processDemoted) { // Skip meta data @@ -1111,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, if (Initializer) CI = dyn_cast<ConstantInt>(Initializer); if (CI) { - unsigned sample=CI->getZExtValue(); + unsigned sample = CI->getZExtValue(); O << " = { "; - for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >> - __CLK_ADDRESS_BASE) ; i < 3 ; i++) { + for (int i = 0, + addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); + i < 3; i++) { O << "addr_mode_" << i << " = "; switch (addr) { - case 0: O << "wrap"; break; - case 1: O << "clamp_to_border"; break; - case 2: O << "clamp_to_edge"; break; - case 3: O << "wrap"; break; - case 4: O << "mirror"; break; + case 0: + O << "wrap"; + break; + case 1: + O << "clamp_to_border"; + break; + case 2: + O << "clamp_to_edge"; + break; + case 3: + O << "wrap"; + break; + case 4: + O << "mirror"; + break; } - O <<", "; + O << ", "; } O << "filter_mode = "; - switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) { - case 0: O << "nearest"; break; - case 1: O << "linear"; break; - case 2: assert ( 0 && "Anisotropic filtering is not supported"); - default: O << "nearest"; break; + switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { + case 0: + O << "nearest"; + break; + case 1: + O << "linear"; + break; + case 2: + assert(0 && "Anisotropic filtering is not supported"); + default: + O << "nearest"; + break; } - if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) { + if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { O << ", force_unnormalized_coords = 1"; } O << " }"; @@ -1176,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, else O << " .align " << GVar->getAlignment(); - if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { O << " ."; O << getPTXFundamentalTypeStr(ETy, false); @@ -1186,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, // Ptx allows variable initilization only for constant and global state // spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) - && GVar->hasInitializer()) { + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && + GVar->hasInitializer()) { Constant *Initializer = GVar->getInitializer(); if (!Initializer->isNullValue()) { - O << " = " ; + O << " = "; printScalarConstant(Initializer, O); } } } else { - unsigned int ElementSize =0; + unsigned int ElementSize = 0; // Although PTX has direct support for struct type and array type and // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for @@ -1210,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, // Ptx allows variable initilization only for constant and // global state spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) - && GVar->hasInitializer()) { + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && + GVar->hasInitializer()) { Constant *Initializer = GVar->getInitializer(); - if (!isa<UndefValue>(Initializer) && - !Initializer->isNullValue()) { + if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { if (nvptxSubtarget.is64Bit()) { - O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ; - O << ElementSize/8; - } - else { - O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ; - O << ElementSize/4; + O << " .u64 " << *Mang->getSymbol(GVar) << "["; + O << ElementSize / 8; + } else { + O << " .u32 " << *Mang->getSymbol(GVar) << "["; + O << ElementSize / 4; } O << "]"; - } - else { - O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + } else { + O << " .b8 " << *Mang->getSymbol(GVar) << "["; O << ElementSize; O << "]"; } - O << " = {" ; + O << " = {"; aggBuffer.print(); O << "}"; - } - else { - O << " .b8 " << *Mang->getSymbol(GVar) ; + } else { + O << " .b8 " << *Mang->getSymbol(GVar); if (ElementSize) { - O <<"[" ; + O << "["; O << ElementSize; O << "]"; } } - } - else { + } else { O << " .b8 " << *Mang->getSymbol(GVar); if (ElementSize) { - O <<"[" ; + O << "["; O << ElementSize; O << "]"; } } break; default: - assert( 0 && "type not supported yet"); + assert(0 && "type not supported yet"); } } @@ -1270,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { std::vector<GlobalVariable *> &gvars = localDecls[f]; - for (unsigned i=0, e=gvars.size(); i!=e; ++i) { + for (unsigned i = 0, e = gvars.size(); i != e; ++i) { O << "\t// demoted variable\n\t"; printModuleLevelGV(gvars[i], O, true); } @@ -1280,24 +1295,24 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const { switch (AddressSpace) { case llvm::ADDRESS_SPACE_LOCAL: - O << "local" ; + O << "local"; break; case llvm::ADDRESS_SPACE_GLOBAL: - O << "global" ; + O << "global"; break; case llvm::ADDRESS_SPACE_CONST: // This logic should be consistent with that in // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) if (nvptxSubtarget.hasGenericLdSt()) - O << "global" ; + O << "global"; else - O << "const" ; + O << "const"; break; case llvm::ADDRESS_SPACE_CONST_NOT_GEN: - O << "const" ; + O << "const"; break; case llvm::ADDRESS_SPACE_SHARED: - O << "shared" ; + O << "shared"; break; default: report_fatal_error("Bad address space found while emitting PTX"); @@ -1305,8 +1320,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, } } -std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, - bool useB4PTR) const { +std::string +NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { switch (Ty->getTypeID()) { default: llvm_unreachable("unexpected type"); @@ -1330,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, return "f64"; case Type::PointerTyID: if (nvptxSubtarget.is64Bit()) - if (useB4PTR) return "b64"; - else return "u64"; + if (useB4PTR) + return "b64"; + else + return "u64"; + else if (useB4PTR) + return "b32"; else - if (useB4PTR) return "b32"; - else return "u32"; + return "u32"; } llvm_unreachable("unexpected type"); return NULL; } -void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, +void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); @@ -1364,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, return; } - int64_t ElementSize =0; + int64_t ElementSize = 0; // Although PTX has direct support for struct type and array type and LLVM IR // is very similar to PTX, the LLVM CodeGen does not support for targets that @@ -1375,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, case Type::ArrayTyID: case Type::VectorTyID: ElementSize = TD->getTypeStoreSize(ETy); - O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + O << " .b8 " << *Mang->getSymbol(GVar) << "["; if (ElementSize) { - O << itostr(ElementSize) ; + O << itostr(ElementSize); } O << "]"; break; default: - assert( 0 && "type not supported yet"); + assert(0 && "type not supported yet"); } - return ; + return; } - -static unsigned int -getOpenCLAlignment(const DataLayout *TD, - Type *Ty) { +static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) return TD->getPrefTypeAlignment(Ty); @@ -1404,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD, unsigned int numE = VTy->getNumElements(); unsigned int alignE = TD->getPrefTypeAlignment(ETy); if (numE == 3) - return 4*alignE; + return 4 * alignE; else - return numE*alignE; + return numE * alignE; } const StructType *STy = dyn_cast<StructType>(Ty); @@ -1414,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD, unsigned int alignStruct = 1; // Go through each element of the struct and find the // largest alignment. - for (unsigned i=0, e=STy->getNumElements(); i != e; i++) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { Type *ETy = STy->getElementType(i); unsigned int align = getOpenCLAlignment(TD, ETy); if (align > alignStruct) @@ -1458,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { } for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { - if (i==paramIndex) { + if (i == paramIndex) { printParamName(I, paramIndex, O); return; } @@ -1466,8 +1481,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { llvm_unreachable("paramIndex out of bound"); } -void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, - raw_ostream &O) { +void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); const AttributeSet &PAL = F->getAttributes(); const TargetLowering *TLI = TM.getTargetLowering(); @@ -1481,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << "(\n"; for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!first) O << ",\n"; @@ -1496,14 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; else // Default image is read_only O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; - } - else // Should be llvm::isSampler(*I) + } else // Should be llvm::isSampler(*I) O << "\t.param .samplerref " << *CurrentFnSym << "_param_" - << paramIndex; + << paramIndex; continue; } - if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) { + if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { + if (Ty->isVectorTy()) { + // Just print .param .b8 .align <a> .param[size]; + // <a> = PAL.getparamalignment + // size = typeallocsize of element type + unsigned align = PAL.getParamAlignment(paramIndex + 1); + if (align == 0) + align = TD->getABITypeAlignment(Ty); + + unsigned sz = TD->getTypeAllocSize(Ty); + O << "\t.param .align " << align << " .b8 "; + printParamName(I, paramIndex, O); + O << "[" << sz << "]"; + + continue; + } // Just a scalar const PointerType *PTy = dyn_cast<PointerType>(Ty); if (isKernelFunc) { @@ -1514,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { Type *ETy = PTy->getElementType(); int addrSpace = PTy->getAddressSpace(); - switch(addrSpace) { + switch (addrSpace) { default: O << ".ptr "; break; @@ -1529,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << ".ptr .global "; break; } - O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " "; + O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; } printParamName(I, paramIndex, O); continue; } // non-pointer scalar to kernel func - O << "\t.param ." - << getPTXFundamentalTypeStr(Ty) << " "; + O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " "; printParamName(I, paramIndex, O); continue; } @@ -1546,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) sz = 32; - } - else if (isa<PointerType>(Ty)) + if (sz < 32) + sz = 32; + } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); @@ -1562,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // param has byVal attribute. So should be a pointer const PointerType *PTy = dyn_cast<PointerType>(Ty); - assert(PTy && - "Param with byval attribute should be a pointer type"); + assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI || isKernelFunc) { // Just print .param .b8 .align <a> .param[size]; // <a> = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex+1); + unsigned align = PAL.getParamAlignment(paramIndex + 1); if (align == 0) align = TD->getABITypeAlignment(ETy); unsigned sz = TD->getTypeAllocSize(ETy); - O << "\t.param .align " << align - << " .b8 "; + O << "\t.param .align " << align << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; continue; @@ -1587,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // each vector element. SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, ETy, vtparts); - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -1595,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << "\t.reg .b" << sz << " "; printParamName(I, paramIndex, O); - if (j<je-1) O << ",\n"; + if (j < je - 1) + O << ",\n"; ++paramIndex; } - if (i<e-1) + if (i < e - 1) O << ",\n"; } --paramIndex; @@ -1620,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, emitFunctionParamList(F, O); } - -void NVPTXAsmPrinter:: -setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { +void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( + const MachineFunction &MF) { SmallString<128> Str; raw_svector_ostream O(Str); @@ -1635,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); int NumBytes = (int) MFI->getStackSize(); if (NumBytes) { - O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" - << DEPOTNAME - << getFunctionNumber() << "[" << NumBytes << "];\n"; + O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME + << getFunctionNumber() << "[" << NumBytes << "];\n"; if (nvptxSubtarget.is64Bit()) { O << "\t.reg .b64 \t%SP;\n"; O << "\t.reg .b64 \t%SPL;\n"; - } - else { + } else { O << "\t.reg .b32 \t%SP;\n"; O << "\t.reg .b32 \t%SPL;\n"; } @@ -1653,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { // register number and the per class virtual register number. // We use the per class virtual register number in the ptx output. unsigned int numVRs = MRI->getNumVirtRegs(); - for (unsigned i=0; i< numVRs; i++) { + for (unsigned i = 0; i < numVRs; i++) { unsigned int vr = TRI->index2VirtReg(i); const TargetRegisterClass *RC = MRI->getRegClass(vr); std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; int n = regmap.size(); - regmap.insert(std::make_pair(vr, n+1)); + regmap.insert(std::make_pair(vr, n + 1)); } // Emit register declarations @@ -1702,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { OutStreamer.EmitRawText(O.str()); } - void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { - APFloat APF = APFloat(Fp->getValueAPF()); // make a copy + APFloat APF = APFloat(Fp->getValueAPF()); // make a copy bool ignored; unsigned int numHex; const char *lead; - if (Fp->getType()->getTypeID()==Type::FloatTyID) { + if (Fp->getType()->getTypeID() == Type::FloatTyID) { numHex = 8; lead = "0f"; - APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, - &ignored); + APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { numHex = 16; lead = "0d"; - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); } else llvm_unreachable("unsupported fp type"); @@ -1760,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { llvm_unreachable("Not scalar type found in printScalarConstant()"); } - void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) { @@ -1768,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); - if (s<Bytes) + if (s < Bytes) s = Bytes; aggBuffer->addZeros(s); return; @@ -1779,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::IntegerTyID: { const Type *ETy = CPV->getType(); - if ( ETy == Type::getInt8Ty(CPV->getContext()) ){ + if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); ptr = &c; aggBuffer->addBytes(ptr, 1, Bytes); - } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) { - short int16 = - (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); - ptr = (unsigned char*)&int16; + } else if (ETy == Type::getInt16Ty(CPV->getContext())) { + short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); - } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) { + } else if (ETy == Type::getInt32Ty(CPV->getContext())) { if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { - int int32 =(int)(constInt->getZExtValue()); - ptr = (unsigned char*)&int32; + int int32 = (int)(constInt->getZExtValue()); + ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - if (ConstantInt *constInt = - dyn_cast<ConstantInt>(ConstantFoldConstantExpression( - Cexpr, TD))) { - int int32 =(int)(constInt->getZExtValue()); - ptr = (unsigned char*)&int32; + if (ConstantInt *constInt = dyn_cast<ConstantInt>( + ConstantFoldConstantExpression(Cexpr, TD))) { + int int32 = (int)(constInt->getZExtValue()); + ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; } @@ -1812,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } } llvm_unreachable("unsupported integer const type"); - } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) { + } else if (ETy == Type::getInt64Ty(CPV->getContext())) { if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { - long long int64 =(long long)(constInt->getZExtValue()); - ptr = (unsigned char*)&int64; + long long int64 = (long long)(constInt->getZExtValue()); + ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { - long long int64 =(long long)(constInt->getZExtValue()); - ptr = (unsigned char*)&int64; + ConstantFoldConstantExpression(Cexpr, TD))) { + long long int64 = (long long)(constInt->getZExtValue()); + ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; } @@ -1841,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::FloatTyID: case Type::DoubleTyID: { ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); - const Type* Ty = CFP->getType(); + const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { - float float32 = (float)CFP->getValueAPF().convertToFloat(); - ptr = (unsigned char*)&float32; + float float32 = (float) CFP->getValueAPF().convertToFloat(); + ptr = (unsigned char *)&float32; aggBuffer->addBytes(ptr, 4, Bytes); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { double float64 = CFP->getValueAPF().convertToDouble(); - ptr = (unsigned char*)&float64; + ptr = (unsigned char *)&float64; aggBuffer->addBytes(ptr, 8, Bytes); - } - else { + } else { llvm_unreachable("unsupported fp const type"); } break; @@ -1859,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::PointerTyID: { if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { aggBuffer->addSymbol(GVar); - } - else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { Value *v = Cexpr->stripPointerCasts(); aggBuffer->addSymbol(v); } @@ -1876,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, isa<ConstantStruct>(CPV)) { int ElementSize = TD->getTypeAllocSize(CPV->getType()); bufferAggregateConstant(CPV, aggBuffer); - if ( Bytes > ElementSize ) - aggBuffer->addZeros(Bytes-ElementSize); - } - else if (isa<ConstantAggregateZero>(CPV)) + if (Bytes > ElementSize) + aggBuffer->addZeros(Bytes - ElementSize); + } else if (isa<ConstantAggregateZero>(CPV)) aggBuffer->addZeros(Bytes); else llvm_unreachable("Unexpected Constant type"); @@ -1905,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, } if (const ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { + dyn_cast<ConstantDataSequential>(CPV)) { if (CDS->getNumElements()) for (unsigned i = 0; i < CDS->getNumElements(); ++i) bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, @@ -1913,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, return; } - if (isa<ConstantStruct>(CPV)) { if (CPV->getNumOperands()) { StructType *ST = cast<StructType>(CPV->getType()); for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { - if ( i == (e - 1)) + if (i == (e - 1)) Bytes = TD->getStructLayout(ST)->getElementOffset(0) + - TD->getTypeAllocSize(ST) - - TD->getStructLayout(ST)->getElementOffset(i); + TD->getTypeAllocSize(ST) - + TD->getStructLayout(ST)->getElementOffset(i); else - Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) - - TD->getStructLayout(ST)->getElementOffset(i); - bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, - aggBuffer); + Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - + TD->getStructLayout(ST)->getElementOffset(i); + bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); } } return; @@ -1937,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, // buildTypeNameMap - Run through symbol table looking for type names. // - bool NVPTXAsmPrinter::isImageType(const Type *Ty) { std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); - if (PI != TypeNameMap.end() && - (!PI->second.compare("struct._image1d_t") || - !PI->second.compare("struct._image2d_t") || - !PI->second.compare("struct._image3d_t"))) + if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || + !PI->second.compare("struct._image2d_t") || + !PI->second.compare("struct._image3d_t"))) return true; return false; @@ -1955,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) { /// bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { + const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. + if (ExtraCode[1] != 0) + return true; // Unknown modifier. switch (ExtraCode[0]) { default: @@ -1974,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } -bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { +bool NVPTXAsmPrinter::PrintAsmMemoryOperand( + const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier + return true; // Unknown modifier O << '['; printMemOperand(MI, OpNo, O); @@ -1989,41 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) -{ - switch(MI.getOpcode()) { +bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return false; - case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0: - case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32: - case NVPTX::CallArgF64: case NVPTX::CallArgI16: - case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: - case NVPTX::CallArgI64: case NVPTX::CallArgI8: - case NVPTX::CallArgParam: case NVPTX::CallVoidInst: - case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End: + case NVPTX::CallArgBeginInst: + case NVPTX::CallArgEndInst0: + case NVPTX::CallArgEndInst1: + case NVPTX::CallArgF32: + case NVPTX::CallArgF64: + case NVPTX::CallArgI16: + case NVPTX::CallArgI32: + case NVPTX::CallArgI32imm: + case NVPTX::CallArgI64: + case NVPTX::CallArgI8: + case NVPTX::CallArgParam: + case NVPTX::CallVoidInst: + case NVPTX::CallVoidInstReg: + case NVPTX::Callseq_End: case NVPTX::CallVoidInstReg64: - case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst: - case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst: - case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst: - case NVPTX::StoreParamF32: case NVPTX::StoreParamF64: - case NVPTX::StoreParamI16: case NVPTX::StoreParamI32: - case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: - case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: - case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: - case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: - case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: - case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: - case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: - case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: - case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: - case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: - case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: - case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32: - case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: - case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: - case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: - case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: - case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: + case NVPTX::DeclareParamInst: + case NVPTX::DeclareRetMemInst: + case NVPTX::DeclareRetRegInst: + case NVPTX::DeclareRetScalarInst: + case NVPTX::DeclareScalarParamInst: + case NVPTX::DeclareScalarRegInst: + case NVPTX::StoreParamF32: + case NVPTX::StoreParamF64: + case NVPTX::StoreParamI16: + case NVPTX::StoreParamI32: + case NVPTX::StoreParamI64: + case NVPTX::StoreParamI8: + case NVPTX::StoreParamS32I8: + case NVPTX::StoreParamU32I8: + case NVPTX::StoreParamS32I16: + case NVPTX::StoreParamU32I16: + case NVPTX::StoreRetvalF32: + case NVPTX::StoreRetvalF64: + case NVPTX::StoreRetvalI16: + case NVPTX::StoreRetvalI32: + case NVPTX::StoreRetvalI64: + case NVPTX::StoreRetvalI8: + case NVPTX::LastCallArgF32: + case NVPTX::LastCallArgF64: + case NVPTX::LastCallArgI16: + case NVPTX::LastCallArgI32: + case NVPTX::LastCallArgI32imm: + case NVPTX::LastCallArgI64: + case NVPTX::LastCallArgI8: + case NVPTX::LastCallArgParam: + case NVPTX::LoadParamMemF32: + case NVPTX::LoadParamMemF64: + case NVPTX::LoadParamMemI16: + case NVPTX::LoadParamMemI32: + case NVPTX::LoadParamMemI64: + case NVPTX::LoadParamMemI8: + case NVPTX::LoadParamRegF32: + case NVPTX::LoadParamRegF64: + case NVPTX::LoadParamRegI16: + case NVPTX::LoadParamRegI32: + case NVPTX::LoadParamRegI64: + case NVPTX::LoadParamRegI8: + case NVPTX::PrototypeInst: + case NVPTX::DBG_VALUE: return true; } return false; @@ -2035,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); } - void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; - LineReader * reader = this->getReader(filename.str()); + LineReader *reader = this->getReader(filename.str()); temp << "\n//"; temp << filename.str(); temp << ":"; @@ -2049,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { this->OutStreamer.EmitRawText(Twine(temp.str())); } - LineReader *NVPTXAsmPrinter::getReader(std::string filename) { - if (reader == NULL) { - reader = new LineReader(filename); + if (reader == NULL) { + reader = new LineReader(filename); } if (reader->fileName() != filename) { delete reader; - reader = new LineReader(filename); + reader = new LineReader(filename); } return reader; } - -std::string -LineReader::readLine(unsigned lineNum) { +std::string LineReader::readLine(unsigned lineNum) { if (lineNum < theCurLine) { theCurLine = 0; - fstr.seekg(0,std::ios::beg); + fstr.seekg(0, std::ios::beg); } while (theCurLine < lineNum) { - fstr.getline(buff,500); + fstr.getline(buff, 500); theCurLine++; } return buff; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 42498f0bf7..6dc9fc0ffe 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -43,15 +43,15 @@ // This is defined in AsmPrinter.cpp. // Used to process the constant expressions in initializers. namespace nvptx { -const llvm::MCExpr *LowerConstant(const llvm::Constant *CV, - llvm::AsmPrinter &AP) ; +const llvm::MCExpr * +LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP); } namespace llvm { class LineReader { private: - unsigned theCurLine ; + unsigned theCurLine; std::ifstream fstr; char buff[512]; std::string theFileName; @@ -63,17 +63,12 @@ public: theFileName = filename; } std::string fileName() { return theFileName; } - ~LineReader() { - fstr.close(); - } + ~LineReader() { fstr.close(); } std::string readLine(unsigned line); }; - - class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { - class AggBuffer { // Used to buffer the emitted string for initializing global // aggregates. @@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { // Once we have this AggBuffer setup, we can choose how to print // it out. public: - unsigned size; // size of the buffer in bytes + unsigned size; // size of the buffer in bytes unsigned char *buffer; // the buffer unsigned numSymbols; // number of symbol addresses SmallVector<unsigned, 4> symbolPosInBuffer; @@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { public: AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - :O(_O),AP(_AP) { + : O(_O), AP(_AP) { buffer = new unsigned char[_size]; size = _size; curpos = 0; numSymbols = 0; } - ~AggBuffer() { - delete [] buffer; - } + ~AggBuffer() { delete[] buffer; } unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { - assert((curpos+Num) <= size); - assert((curpos+Bytes) <= size); - for ( int i= 0; i < Num; ++i) { + assert((curpos + Num) <= size); + assert((curpos + Bytes) <= size); + for (int i = 0; i < Num; ++i) { buffer[curpos] = Ptr[i]; - curpos ++; + curpos++; } - for ( int i=Num; i < Bytes ; ++i) { + for (int i = Num; i < Bytes; ++i) { buffer[curpos] = 0; - curpos ++; + curpos++; } return curpos; } unsigned addZeros(int Num) { - assert((curpos+Num) <= size); - for ( int i= 0; i < Num; ++i) { + assert((curpos + Num) <= size); + for (int i = 0; i < Num; ++i) { buffer[curpos] = 0; - curpos ++; + curpos++; } return curpos; } @@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { void print() { if (numSymbols == 0) { // print out in bytes - for (unsigned i=0; i<size; i++) { + for (unsigned i = 0; i < size; i++) { if (i) O << ", "; - O << (unsigned int)buffer[i]; + O << (unsigned int) buffer[i]; } } else { // print out in 4-bytes or 8-bytes @@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned int nBytes = 4; if (AP.nvptxSubtarget.is64Bit()) nBytes = 8; - for (pos=0; pos<size; pos+=nBytes) { + for (pos = 0; pos < size; pos += nBytes) { if (pos) O << ", "; if (pos == nextSymbolPos) { @@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { MCSymbol *Name = AP.Mang->getSymbol(GVar); O << *Name; - } - else if (ConstantExpr *Cexpr = - dyn_cast<ConstantExpr>(v)) { + } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { O << *nvptx::LowerConstant(Cexpr, AP); } else llvm_unreachable("symbol type unknown"); nSym++; if (nSym >= numSymbols) - nextSymbolPos = size+1; + nextSymbolPos = size + 1; else nextSymbolPos = symbolPosInBuffer[nSym]; - } else - if (nBytes == 4) - O << *(unsigned int*)(buffer+pos); - else - O << *(unsigned long long*)(buffer+pos); + } else if (nBytes == 4) + O << *(unsigned int *)(buffer + pos); + else + O << *(unsigned long long *)(buffer + pos); } } } @@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { virtual void emitSrcInText(StringRef filename, unsigned line); -private : - virtual const char *getPassName() const { - return "NVPTX Assembly Printer"; - } +private: + virtual const char *getPassName() const { return "NVPTX Assembly Printer"; } const Function *F; std::string CurrentFnName; @@ -207,31 +195,28 @@ private : void printGlobalVariable(const GlobalVariable *GVar); void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); + const char *Modifier = 0); void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); - void printVecModifiedImmediate(const MachineOperand &MO, - const char *Modifier, raw_ostream &O); + const char *Modifier = 0); + void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, + raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); + const char *Modifier = 0); void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; // definition autogenerated. void printInstruction(const MachineInstr *MI, raw_ostream &O); - void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, - bool=false); + void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false); void printParamName(int paramIndex, raw_ostream &O); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitHeader(Module &M, raw_ostream &O); - void emitKernelFunctionDirectives(const Function& F, - raw_ostream &O) const; + void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O); void emitFunctionExternParamList(const MachineFunction &MF); void emitFunctionParamList(const Function *, raw_ostream &O); void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O); void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); - void emitFunctionTempData(const MachineFunction &MF, - unsigned &FrameSize); + void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize); bool isImageType(const Type *Ty); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, @@ -269,17 +254,16 @@ private: void recordAndEmitFilenames(Module &); void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); - void emitPTXAddressSpace(unsigned int AddressSpace, - raw_ostream &O) const; - std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ; - void printScalarConstant(Constant *CPV, raw_ostream &O) ; - void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ; - void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ; - void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ; + void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; + std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const; + void printScalarConstant(Constant *CPV, raw_ostream &O); + void printFPConstant(const ConstantFP *Fp, raw_ostream &O); + void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer); + void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer); void printOperandProper(const MachineOperand &MO); - void emitLinkageDirective(const GlobalValue* V, raw_ostream &O); + void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); void emitDeclarations(Module &, raw_ostream &O); void emitDeclaration(const Function *, raw_ostream &O); @@ -289,10 +273,9 @@ private: LineReader *reader; LineReader *getReader(std::string); public: - NVPTXAsmPrinter(TargetMachine &TM, - MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { CurrentBankselLabelInBasicBlock = ""; VRidGlobal2LocalMap = NULL; reader = NULL; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index bb2c55ceed..6533da5102 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -25,9 +25,7 @@ using namespace llvm; -bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { - return true; -} +bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFrameInfo()->hasStackObjects()) { @@ -42,46 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { // mov %SPL, %depot; // cvta.local %SP, %SPL; if (is64bit) { - MachineInstr *MI = BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal) - .addReg(NVPTX::VRDepot); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), + NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); } else { - MachineInstr *MI = BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::cvta_local_yes), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal) - .addReg(NVPTX::VRDepot); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), + NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); } - } - else { + } else { // mov %SP, %depot; if (is64bit) - BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame) - .addReg(NVPTX::VRDepot); + BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), + NVPTX::VRFrame).addReg(NVPTX::VRDepot); else - BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame) - .addReg(NVPTX::VRDepot); + BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), + NVPTX::VRFrame).addReg(NVPTX::VRDepot); } } } void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { -} + MachineBasicBlock &MBB) const {} // This function eliminates ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions -void NVPTXFrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { +void NVPTXFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { // Simply discard ADJCALLSTACKDOWN, // ADJCALLSTACKUP instructions. MBB.erase(I); } - diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index d34e7bec1d..819f1dd3f4 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -16,7 +16,6 @@ #include "llvm/Target/TargetFrameLowering.h" - namespace llvm { class NVPTXTargetMachine; @@ -26,13 +25,12 @@ class NVPTXFrameLowering : public TargetFrameLowering { public: explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit) - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), - tm(_tm), is64bit(_is64bit) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm), + is64bit(_is64bit) {} virtual bool hasFP(const MachineFunction &MF) const; virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 481f13afd1..e862988c85 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "NVPTXISelDAGToDAG.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -26,27 +25,22 @@ using namespace llvm; - -static cl::opt<bool> -UseFMADInstruction("nvptx-mad-enable", - cl::ZeroOrMore, - cl::desc("NVPTX Specific: Enable generating FMAD instructions"), - cl::init(false)); +static cl::opt<bool> UseFMADInstruction( + "nvptx-mad-enable", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Enable generating FMAD instructions"), + cl::init(false)); static cl::opt<int> -FMAContractLevel("nvptx-fma-level", - cl::ZeroOrMore, +FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" - " 1: do it 2: do it aggressively"), - cl::init(2)); - + " 1: do it 2: do it aggressively"), + cl::init(2)); -static cl::opt<int> -UsePrecDivF32("nvptx-prec-divf32", - cl::ZeroOrMore, - cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" - " IEEE Compliant F32 div.rnd if avaiable."), - cl::init(2)); +static cl::opt<int> UsePrecDivF32( + "nvptx-prec-divf32", cl::ZeroOrMore, + cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" + " IEEE Compliant F32 div.rnd if avaiable."), + cl::init(2)); /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. @@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, return new NVPTXDAGToDAGISel(TM, OptLevel); } - NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) -: SelectionDAGISel(tm, OptLevel), - Subtarget(tm.getSubtarget<NVPTXSubtarget>()) -{ + : SelectionDAGISel(tm, OptLevel), + Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { // Always do fma.f32 fpcontract if the target supports the instruction. // Always do fma.f64 fpcontract if the target supports the instruction. // Do mad.f32 is nvptx-mad-enable is specified and the target does not // support fma.f32. doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32(); - doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && - (FMAContractLevel>=1); - doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && - (FMAContractLevel>=1); - doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() && - (FMAContractLevel==2); - doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() && - (FMAContractLevel==2); + doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1); + doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1); + doFMAF32AGG = + (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2); + doFMAF64AGG = + (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2); allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction; @@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. -SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) - return NULL; // Already selected. + return NULL; // Already selected. SDNode *ResNode = NULL; switch (N->getOpcode()) { @@ -119,30 +109,34 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::StoreV4: ResNode = SelectStoreVector(N); break; - default: break; + default: + break; } if (ResNode) return ResNode; return SelectCode(N); } - -static unsigned int -getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) -{ +static unsigned int getCodeAddrSpace(MemSDNode *N, + const NVPTXSubtarget &Subtarget) { const Value *Src = N->getSrcValue(); if (!Src) return NVPTX::PTXLdStInstCode::LOCAL; if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { switch (PT->getAddressSpace()) { - case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; - case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; - case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; + case llvm::ADDRESS_SPACE_LOCAL: + return NVPTX::PTXLdStInstCode::LOCAL; + case llvm::ADDRESS_SPACE_GLOBAL: + return NVPTX::PTXLdStInstCode::GLOBAL; + case llvm::ADDRESS_SPACE_SHARED: + return NVPTX::PTXLdStInstCode::SHARED; case llvm::ADDRESS_SPACE_CONST_NOT_GEN: return NVPTX::PTXLdStInstCode::CONSTANT; - case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; - case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; + case llvm::ADDRESS_SPACE_GENERIC: + return NVPTX::PTXLdStInstCode::GENERIC; + case llvm::ADDRESS_SPACE_PARAM: + return NVPTX::PTXLdStInstCode::PARAM; case llvm::ADDRESS_SPACE_CONST: // If the arch supports generic address space, translate it to GLOBAL // for correctness. @@ -153,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) return NVPTX::PTXLdStInstCode::GLOBAL; else return NVPTX::PTXLdStInstCode::CONSTANT; - default: break; + default: + break; } } return NVPTX::PTXLdStInstCode::LOCAL; } - -SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); - SDNode *NVPTXLD= NULL; + SDNode *NVPTXLD = NULL; // do not support pre/post inc/dec if (LD->isIndexed()) @@ -204,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); - unsigned fromTypeWidth = ScalarVT.getSizeInBits(); + unsigned fromTypeWidth = ScalarVT.getSizeInBits(); unsigned int fromType; if ((LD->getExtensionType() == ISD::SEXTLOAD)) fromType = NVPTX::PTXLdStInstCode::Signed; @@ -223,105 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { if (SelectDirectAddr(N1, Addr)) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_avar; break; - case MVT::i16: Opcode = NVPTX::LD_i16_avar; break; - case MVT::i32: Opcode = NVPTX::LD_i32_avar; break; - case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; - case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; - case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_avar; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_avar; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_avar; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_avar; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_avar; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_avar; + break; + default: + return NULL; } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Addr, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 7); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N1.getNode(), N1, Base, Offset): - SelectADDRsi(N1.getNode(), N1, Base, Offset)) { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Addr, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) + : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_asi; break; - case MVT::i16: Opcode = NVPTX::LD_i16_asi; break; - case MVT::i32: Opcode = NVPTX::LD_i32_asi; break; - case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; - case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; - case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_asi; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_asi; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_asi; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_asi; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_asi; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_asi; + break; + default: + return NULL; } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N1.getNode(), N1, Base, Offset): - SelectADDRri(N1.getNode(), N1, Base, Offset)) { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N1.getNode(), N1, Base, Offset) + : SelectADDRri(N1.getNode(), N1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_ari_64; + break; + default: + return NULL; } } else { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_ari; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_ari; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_ari; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_ari; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_ari; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_ari; + break; + default: + return NULL; } } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 8); - } - else { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + } else { if (Subtarget.is64Bit()) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_areg_64; + break; + default: + return NULL; } } else { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_areg; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_areg; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_areg; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_areg; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_areg; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_areg; + break; + default: + return NULL; } } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - N1, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 7); + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), N1, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); } if (NVPTXLD != NULL) { @@ -344,9 +399,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { MemSDNode *MemSD = cast<MemSDNode>(N); EVT LoadedVT = MemSD->getMemoryVT(); - if (!LoadedVT.isSimple()) - return NULL; + return NULL; // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); @@ -369,11 +423,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); - unsigned FromTypeWidth = ScalarVT.getSizeInBits(); + unsigned FromTypeWidth = ScalarVT.getSizeInBits(); unsigned int FromType; // The last operand holds the original LoadSDNode::getExtensionType() value - unsigned ExtensionType = - cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue(); + unsigned ExtensionType = cast<ConstantSDNode>( + N->getOperand(N->getNumOperands() - 1))->getZExtValue(); if (ExtensionType == ISD::SEXTLOAD) FromType = NVPTX::PTXLdStInstCode::Signed; else if (ScalarVT.isFloatingPoint()) @@ -384,197 +438,328 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { unsigned VecType; switch (N->getOpcode()) { - case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break; - case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break; - default: return NULL; + case NVPTXISD::LoadV2: + VecType = NVPTX::PTXLdStInstCode::V2; + break; + case NVPTXISD::LoadV4: + VecType = NVPTX::PTXLdStInstCode::V4; + break; + default: + return NULL; } EVT EltVT = N->getValueType(0); if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_avar; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_avar; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_avar; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_avar; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_avar; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_avar; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_avar; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_avar; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_avar; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_avar; + break; } break; } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Addr, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Addr, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(Op1.getNode(), Op1, Base, Offset): - SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_asi; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_asi; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_asi; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_asi; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_asi; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_asi; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_asi; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_asi; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_asi; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_asi; + break; } break; } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRri64(Op1.getNode(), Op1, Base, Offset): - SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_ari_64; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_ari_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_ari; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_ari; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_ari; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_ari; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_ari; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_ari; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_ari; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_ari; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_ari; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_ari; + break; } break; } } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_areg_64; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_areg_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_areg; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_areg; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_areg; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_areg; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_areg; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_areg; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_areg; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_areg; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_areg; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_areg; + break; } break; } } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Op1, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Op1, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); } @@ -598,89 +783,179 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { // Select opcode if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LDGV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; + break; } break; case NVPTXISD::LDGV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; + break; } break; case NVPTXISD::LDUV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + break; } break; case NVPTXISD::LDUV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LDGV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; + break; } break; case NVPTXISD::LDGV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; + break; } break; case NVPTXISD::LDUV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + break; } break; case NVPTXISD::LDUV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + break; } break; } @@ -696,8 +971,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { return LD; } - -SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(N); EVT StoreVT = ST->getMemoryVT(); @@ -738,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { // - for integer type, always use 'u' // MVT ScalarVT = SimpleVT.getScalarType(); - unsigned toTypeWidth = ScalarVT.getSizeInBits(); + unsigned toTypeWidth = ScalarVT.getSizeInBits(); unsigned int toType; if (ScalarVT.isFloatingPoint()) toType = NVPTX::PTXLdStInstCode::Float; @@ -757,108 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { if (SelectDirectAddr(N2, Addr)) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_avar; break; - case MVT::i16: Opcode = NVPTX::ST_i16_avar; break; - case MVT::i32: Opcode = NVPTX::ST_i32_avar; break; - case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; - case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; - case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_avar; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_avar; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_avar; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_avar; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_avar; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_avar; + break; + default: + return NULL; } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Addr, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N2.getNode(), N2, Base, Offset): - SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Addr, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_asi; break; - case MVT::i16: Opcode = NVPTX::ST_i16_asi; break; - case MVT::i32: Opcode = NVPTX::ST_i32_asi; break; - case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; - case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; - case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_asi; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_asi; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_asi; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_asi; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_asi; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_asi; + break; + default: + return NULL; } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 9); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N2.getNode(), N2, Base, Offset): - SelectADDRri(N2.getNode(), N2, Base, Offset)) { + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_ari_64; + break; + default: + return NULL; } } else { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_ari; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_ari; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_ari; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_ari; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_ari; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_ari; + break; + default: + return NULL; } } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 9); + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else { if (Subtarget.is64Bit()) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_areg_64; + break; + default: + return NULL; } } else { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_areg; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_areg; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_areg; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_areg; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_areg; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_areg; + break; + default: + return NULL; } } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - N2, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 8); + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), N2, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); } if (NVPTXST != NULL) { @@ -901,14 +1233,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { // - for integer type, always use 'u' assert(StoreVT.isSimple() && "Store value is not simple"); MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); - unsigned ToTypeWidth = ScalarVT.getSizeInBits(); + unsigned ToTypeWidth = ScalarVT.getSizeInBits(); unsigned ToType; if (ScalarVT.isFloatingPoint()) ToType = NVPTX::PTXLdStInstCode::Float; else ToType = NVPTX::PTXLdStInstCode::Unsigned; - SmallVector<SDValue, 12> StOps; SDValue N2; unsigned VecType; @@ -928,7 +1259,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { StOps.push_back(N->getOperand(4)); N2 = N->getOperand(5); break; - default: return NULL; + default: + return NULL; } StOps.push_back(getI32Imm(IsVolatile)); @@ -939,105 +1271,197 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { if (SelectDirectAddr(N2, Addr)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_avar; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_avar; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_avar; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_avar; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_avar; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_avar; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_avar; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_avar; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_avar; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_avar; + break; } break; } StOps.push_back(Addr); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N2.getNode(), N2, Base, Offset): - SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_asi; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_asi; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_asi; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_asi; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_asi; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_asi; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_asi; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_asi; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_asi; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_asi; + break; } break; } StOps.push_back(Base); StOps.push_back(Offset); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N2.getNode(), N2, Base, Offset): - SelectADDRri(N2.getNode(), N2, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_ari_64; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_ari_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_ari; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_ari; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_ari; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_ari; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_ari; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_ari; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_ari; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_ari; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_ari; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_ari; + break; } break; } @@ -1047,49 +1471,95 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_areg_64; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_areg_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_areg; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_areg; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_areg; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_areg; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_areg; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_areg; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_areg; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_areg; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_areg; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_areg; + break; } break; } @@ -1112,8 +1582,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { // Return true if TGA or ES. - if (N.getOpcode() == ISD::TargetGlobalAddress - || N.getOpcode() == ISD::TargetExternalSymbol) { + if (N.getOpcode() == ISD::TargetGlobalAddress || + N.getOpcode() == ISD::TargetExternalSymbol) { Address = N; return true; } @@ -1131,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { } // symbol+offset -bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT mvt) { +bool NVPTXDAGToDAGISel::SelectADDRsi_imp( + SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - SDValue base=Addr.getOperand(0); + SDValue base = Addr.getOperand(0); if (SelectDirectAddr(base, Base)) { Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); return true; @@ -1159,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, } // register+offset -bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT mvt) { +bool NVPTXDAGToDAGISel::SelectADDRri_imp( + SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); Offset = CurDAG->getTargetConstant(0, mvt); @@ -1169,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, } if (Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // direct calls. + return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { if (SelectDirectAddr(Addr.getOperand(0), Addr)) { @@ -1177,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, } if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { if (FrameIndexSDNode *FIN = - dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) + dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) // Constant offset from frame ref. Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); else @@ -1209,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, // (See SelectionDAGNodes.h). So we need to check for both. if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { Src = mN->getSrcValue(); - } - else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { + } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { Src = mN->getSrcValue(); } if (!Src) @@ -1222,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. -bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps) { +bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; switch (ConstraintCode) { - default: return true; - case 'm': // memory + default: + return true; + case 'm': // memory if (SelectDirectAddr(Op, Op0)) { OutOps.push_back(Op0); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); @@ -1251,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, // pattern matcher inserts a bunch of IMOVi8rr to convert // the imm to i8imm, and this causes instruction selection // to fail. -bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, - SDValue &Retval) { - if (!(N.getOpcode() == ISD::UNDEF) && - !(N.getOpcode() == ISD::Constant)) +bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) { + if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant)) return false; if (N.getOpcode() == ISD::UNDEF) diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 4ec924117a..70e8e46429 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -64,11 +64,10 @@ public: const NVPTXSubtarget &Subtarget; - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); + virtual bool SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); private: - // Include the pieces autogenerated from the target description. +// Include the pieces autogenerated from the target description. #include "NVPTXGenDAGISel.inc" SDNode *Select(SDNode *N); @@ -99,7 +98,6 @@ private: bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset); - bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index e9a9fbfd04..6e01a5a820 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "NVPTXISelLowering.h" #include "NVPTX.h" #include "NVPTXTargetMachine.h" @@ -44,14 +43,14 @@ using namespace llvm; static unsigned int uniqueCallSite = 0; -static cl::opt<bool> -sched4reg("nvptx-sched4reg", - cl::desc("NVPTX Specific: schedule for register pressue"), - cl::init(false)); +static cl::opt<bool> sched4reg( + "nvptx-sched4reg", + cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); static bool IsPTXVectorType(MVT VT) { switch (VT.SimpleTy) { - default: return false; + default: + return false; case MVT::v2i8: case MVT::v4i8: case MVT::v2i16: @@ -62,22 +61,21 @@ static bool IsPTXVectorType(MVT VT) { case MVT::v2f32: case MVT::v4f32: case MVT::v2f64: - return true; + return true; } } // NVPTXTargetLowering Constructor. NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) -: TargetLowering(TM, new NVPTXTargetObjectFile()), - nvTM(&TM), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather // then generating calls to memset, mempcy or memmove. - MaxStoresPerMemset = (unsigned)0xFFFFFFFF; - MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF; - MaxStoresPerMemmove = (unsigned)0xFFFFFFFF; + MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -100,52 +98,50 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); // Operations not directly supported by NVPTX. - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::BR_CC, MVT::i8, Expand); - setOperationAction(ISD::BR_CC, MVT::i16, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i8, Expand); + setOperationAction(ISD::BR_CC, MVT::i16, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (nvptxSubtarget.hasROT64()) { - setOperationAction(ISD::ROTL , MVT::i64, Legal); - setOperationAction(ISD::ROTR , MVT::i64, Legal); - } - else { - setOperationAction(ISD::ROTL , MVT::i64, Expand); - setOperationAction(ISD::ROTR , MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Legal); + setOperationAction(ISD::ROTR, MVT::i64, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); } if (nvptxSubtarget.hasROT32()) { - setOperationAction(ISD::ROTL , MVT::i32, Legal); - setOperationAction(ISD::ROTR , MVT::i32, Legal); - } - else { - setOperationAction(ISD::ROTL , MVT::i32, Expand); - setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i32, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); } - setOperationAction(ISD::ROTL , MVT::i16, Expand); - setOperationAction(ISD::ROTR , MVT::i16, Expand); - setOperationAction(ISD::ROTL , MVT::i8, Expand); - setOperationAction(ISD::ROTR , MVT::i8, Expand); - setOperationAction(ISD::BSWAP , MVT::i16, Expand); - setOperationAction(ISD::BSWAP , MVT::i32, Expand); - setOperationAction(ISD::BSWAP , MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); + setOperationAction(ISD::BSWAP, MVT::i16, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); // Indirect branch is not supported. // This also disables Jump Table creation. - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); - setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); // We want to legalize constant related memmove and memcopy // intrinsics. @@ -168,16 +164,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::i8, MVT::i1, Expand); // This is legal in NVPTX - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // TRAP can be lowered to PTX trap - setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Legal); // Register custom handling for vector loads/stores - for (int i = MVT::FIRST_VECTOR_VALUETYPE; - i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; + for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; + ++i) { + MVT VT = (MVT::SimpleValueType) i; if (IsPTXVectorType(VT)) { setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -190,49 +186,86 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) computeRegisterProperties(); } - const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; - case NVPTXISD::CALL: return "NVPTXISD::CALL"; - case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; - case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; - case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin"; - case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; + default: + return 0; + case NVPTXISD::CALL: + return "NVPTXISD::CALL"; + case NVPTXISD::RET_FLAG: + return "NVPTXISD::RET_FLAG"; + case NVPTXISD::Wrapper: + return "NVPTXISD::Wrapper"; + case NVPTXISD::NVBuiltin: + return "NVPTXISD::NVBuiltin"; + case NVPTXISD::DeclareParam: + return "NVPTXISD::DeclareParam"; case NVPTXISD::DeclareScalarParam: return "NVPTXISD::DeclareScalarParam"; - case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; - case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; - case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; - case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; - case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; - case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; - case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; - case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam"; - case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; - case NVPTXISD::CallArg: return "NVPTXISD::CallArg"; - case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg"; - case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd"; - case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid"; - case NVPTXISD::CallVal: return "NVPTXISD::CallVal"; - case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol"; - case NVPTXISD::Prototype: return "NVPTXISD::Prototype"; - case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; - case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval"; - case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval"; - case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; - case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; - case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; - case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; - case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; - case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2"; - case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4"; - case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2"; - case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4"; - case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2"; - case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4"; - case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2"; - case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; + case NVPTXISD::DeclareRet: + return "NVPTXISD::DeclareRet"; + case NVPTXISD::DeclareRetParam: + return "NVPTXISD::DeclareRetParam"; + case NVPTXISD::PrintCall: + return "NVPTXISD::PrintCall"; + case NVPTXISD::LoadParam: + return "NVPTXISD::LoadParam"; + case NVPTXISD::StoreParam: + return "NVPTXISD::StoreParam"; + case NVPTXISD::StoreParamS32: + return "NVPTXISD::StoreParamS32"; + case NVPTXISD::StoreParamU32: + return "NVPTXISD::StoreParamU32"; + case NVPTXISD::MoveToParam: + return "NVPTXISD::MoveToParam"; + case NVPTXISD::CallArgBegin: + return "NVPTXISD::CallArgBegin"; + case NVPTXISD::CallArg: + return "NVPTXISD::CallArg"; + case NVPTXISD::LastCallArg: + return "NVPTXISD::LastCallArg"; + case NVPTXISD::CallArgEnd: + return "NVPTXISD::CallArgEnd"; + case NVPTXISD::CallVoid: + return "NVPTXISD::CallVoid"; + case NVPTXISD::CallVal: + return "NVPTXISD::CallVal"; + case NVPTXISD::CallSymbol: + return "NVPTXISD::CallSymbol"; + case NVPTXISD::Prototype: + return "NVPTXISD::Prototype"; + case NVPTXISD::MoveParam: + return "NVPTXISD::MoveParam"; + case NVPTXISD::MoveRetval: + return "NVPTXISD::MoveRetval"; + case NVPTXISD::MoveToRetval: + return "NVPTXISD::MoveToRetval"; + case NVPTXISD::StoreRetval: + return "NVPTXISD::StoreRetval"; + case NVPTXISD::PseudoUseParam: + return "NVPTXISD::PseudoUseParam"; + case NVPTXISD::RETURN: + return "NVPTXISD::RETURN"; + case NVPTXISD::CallSeqBegin: + return "NVPTXISD::CallSeqBegin"; + case NVPTXISD::CallSeqEnd: + return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::LoadV2: + return "NVPTXISD::LoadV2"; + case NVPTXISD::LoadV4: + return "NVPTXISD::LoadV4"; + case NVPTXISD::LDGV2: + return "NVPTXISD::LDGV2"; + case NVPTXISD::LDGV4: + return "NVPTXISD::LDGV4"; + case NVPTXISD::LDUV2: + return "NVPTXISD::LDUV2"; + case NVPTXISD::LDUV4: + return "NVPTXISD::LDUV4"; + case NVPTXISD::StoreV2: + return "NVPTXISD::StoreV2"; + case NVPTXISD::StoreV4: + return "NVPTXISD::StoreV4"; } } @@ -248,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); } -std::string NVPTXTargetLowering::getPrototype(Type *retTy, - const ArgListTy &Args, - const SmallVectorImpl<ISD::OutputArg> &Outs, - unsigned retAlignment) const { +std::string NVPTXTargetLowering::getPrototype( + Type *retTy, const ArgListTy &Args, + const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); @@ -267,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { size = ITy->getBitWidth(); - if (size < 32) size = 32; - } - else { + if (size < 32) + size = 32; + } else { assert(retTy->isFloatingPointTy() && "Floating point type expected here"); size = retTy->getPrimitiveSizeInBits(); } O << ".param .b" << size << " _"; - } - else if (isa<PointerType>(retTy)) - O << ".param .b" << getPointerTy().getSizeInBits() - << " _"; + } else if (isa<PointerType>(retTy)) + O << ".param .b" << getPointerTy().getSizeInBits() << " _"; else { if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned totalsz = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - totalsz += sz/8; + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; } } - O << ".param .align " - << retAlignment - << " .b8 _[" - << totalsz << "]"; - } - else { - assert(false && - "Unknown return type"); + O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; + } else { + assert(false && "Unknown return type"); } } - } - else { + } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned idx = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -322,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " _"; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; ++idx; } - if (i < e-1) + if (i < e - 1) O << ", "; } } @@ -340,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, bool first = true; MVT thePointerTy = getPointerTy(); - for (unsigned i=0,e=Args.size(); i!=e; ++i) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { const Type *Ty = Args[i].Ty; if (!first) { O << ", "; @@ -351,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) sz = 32; - } - else if (isa<PointerType>(Ty)) + if (sz < 32) + sz = 32; + } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); @@ -365,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, continue; } const PointerType *PTy = dyn_cast<PointerType>(Ty); - assert(PTy && - "Param with byval attribute should be a pointer type"); + assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI) { unsigned align = Outs[i].Flags.getByValAlign(); unsigned sz = getDataLayout()->getTypeAllocSize(ETy); - O << ".param .align " << align - << " .b8 "; + O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; continue; - } - else { + } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, ETy, vtparts); - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -389,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " "; O << "_"; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; } - if (i<e-1) + if (i < e - 1) O << ", "; } continue; @@ -406,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, return O.str(); } - -SDValue -NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const { - SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; +SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; - ArgListTy &Args = CLI.Args; - Type *retTy = CLI.RetTy; - ImmutableCallSite *CS = CLI.CS; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + ArgListTy &Args = CLI.Args; + Type *retTy = CLI.RetTy; + ImmutableCallSite *CS = CLI.CS; bool isABI = (nvptxSubtarget.getSmVersion() >= 20); SDValue tempChain = Chain; - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true)); + Chain = + DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true)); SDValue InFlag = Chain.getValue(1); assert((Outs.size() == Args.size()) && @@ -434,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned paramCount = 0; // Declare the .params or .reg need to pass values // to the function - for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { EVT VT = Outs[i].VT; if (Outs[i].Flags.isByVal() == false) { @@ -445,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isABI) isReg = 0; unsigned sz = VT.getSizeInBits(); - if (VT.isInteger() && (sz < 32)) sz = 32; + if (VT.isInteger() && (sz < 32)) + sz = 32; SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), - DAG.getConstant(isReg, MVT::i32), - InFlag }; + DAG.getConstant(isReg, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), OutVals[i], InFlag }; + DAG.getConstant(0, MVT::i32), OutVals[i], + InFlag }; unsigned opcode = NVPTXISD::StoreParam; if (isReg) @@ -477,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // struct or vector SmallVector<EVT, 16> vtparts; const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); - assert(PTy && - "Type of a byval parameter should be pointer"); + assert(PTy && "Type of a byval parameter should be pointer"); ComputeValueVTs(*this, PTy->getElementType(), vtparts); if (isABI) { @@ -488,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The ByValAlign in the Outs[i].Flags is alway set at this point, so we // don't need to // worry about natural alignment or not. See TargetLowering::LowerCallTo() - SDValue DeclareParamOps[] = { Chain, - DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(sz, MVT::i32), - InFlag }; + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), + DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), + InFlag + }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); unsigned curOffset = 0; - for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; EVT elemtype = vtparts[j]; if (vtparts[j].isVector()) { elems = vtparts[j].getVectorNumElements(); elemtype = vtparts[j].getVectorElementType(); } - for (unsigned k=0,ke=elems; k!=ke; ++k) { + for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), - OutVals[i], - DAG.getConstant(curOffset, - getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = + DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, 0); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, - MVT::i32), - DAG.getConstant(curOffset, MVT::i32), - theVal, InFlag }; + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(curOffset, MVT::i32), + theVal, InFlag }; Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, 5); InFlag = Chain.getValue(1); - curOffset += sz/8; + curOffset += sz / 8; } } ++paramCount; @@ -530,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Non-abi, struct or vector // Declare a bunch or .reg .b<size> .param<n> unsigned curOffset = 0; - for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; EVT elemtype = vtparts[j]; if (vtparts[j].isVector()) { elems = vtparts[j].getVectorNumElements(); elemtype = vtparts[j].getVectorElementType(); } - for (unsigned k=0,ke=elems; k!=ke; ++k) { + for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, - MVT::i32), - DAG.getConstant(sz, MVT::i32), - DAG.getConstant(1, MVT::i32), - InFlag }; + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(1, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], - DAG.getConstant(curOffset, - getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = + DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), + false, false, false, 0); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(0, MVT::i32), theVal, @@ -578,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or // individual .reg .b<size> func_retval<0..> for non ABI unsigned resultsz = 0; - for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = resvtparts[i]; if (resvtparts[i].isVector()) { elems = resvtparts[i].getVectorNumElements(); elemtype = resvtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (isABI == false) { - if (elemtype.isInteger() && (sz < 32)) sz = 32; - } - else { - if (elemtype.isInteger() && (sz < 8)) sz = 8; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; + } else { + if (elemtype.isInteger() && (sz < 8)) + sz = 8; } if (isABI == false) { SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); @@ -609,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (isABI) { if (retTy->isPrimitiveType() || retTy->isIntegerTy() || - retTy->isPointerTy() ) { + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; @@ -620,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, DeclareRetOps, 5); InFlag = Chain.getValue(1); - } - else { + } else { if (Func) { // direct call if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) retAlignment = getDataLayout()->getABITypeAlignment(retTy); @@ -631,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, retAlignment = getDataLayout()->getABITypeAlignment(retTy); } SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, - MVT::i32), - DAG.getConstant(resultsz/8, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; + SDValue DeclareRetOps[] = { Chain, + DAG.getConstant(retAlignment, MVT::i32), + DAG.getConstant(resultsz / 8, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, DeclareRetOps, 5); InFlag = Chain.getValue(1); @@ -652,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // INLINEASM SDNode. SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); - const char *asmstr = nvTM->getManagedStrPool()-> - getManagedString(proto_string.c_str())->c_str(); - SDValue InlineAsmOps[] = { Chain, - DAG.getTargetExternalSymbol(asmstr, - getPointerTy()), - DAG.getMDNode(0), - DAG.getTargetConstant(0, MVT::i32), InFlag }; + const char *asmstr = nvTM->getManagedStrPool() + ->getManagedString(proto_string.c_str())->c_str(); + SDValue InlineAsmOps[] = { + Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()), + DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag + }; Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); InFlag = Chain.getValue(1); } // Op to just print "call" SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrintCallOps[] = { Chain, - DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1) - : retCount, MVT::i32), - InFlag }; - Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl, - PrintCallVTs, PrintCallOps, 3); + SDValue PrintCallOps[] = { + Chain, + DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32), + InFlag + }; + Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), + dl, PrintCallVTs, PrintCallOps, 3); InFlag = Chain.getValue(1); // Ops to print out the function name @@ -685,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallArgBeginOps, 2); InFlag = Chain.getValue(1); - for (unsigned i=0, e=paramCount; i!=e; ++i) { + for (unsigned i = 0, e = paramCount; i != e; ++i) { unsigned opcode; - if (i==(e-1)) + if (i == (e - 1)) opcode = NVPTXISD::LastCallArg; else opcode = NVPTXISD::CallArg; SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), - DAG.getConstant(i, MVT::i32), - InFlag }; + DAG.getConstant(i, MVT::i32), InFlag }; Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); InFlag = Chain.getValue(1); } SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgEndOps[] = { Chain, - DAG.getConstant(Func ? 1 : 0, MVT::i32), + SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, - 3); + Chain = + DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); InFlag = Chain.getValue(1); if (!Func) { SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrototypeOps[] = { Chain, - DAG.getConstant(uniqueCallSite, MVT::i32), + SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); InFlag = Chain.getValue(1); @@ -719,32 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Ins.size() > 0) { if (isABI) { unsigned resoffset = 0; - for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { unsigned sz = Ins[i].VT.getSizeInBits(); - if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; + if (Ins[i].VT.isInteger() && (sz < 8)) + sz = 8; EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue }; - SDValue LoadRetOps[] = { - Chain, - DAG.getConstant(1, MVT::i32), - DAG.getConstant(resoffset, MVT::i32), - InFlag - }; + SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(resoffset, MVT::i32), InFlag }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, LoadRetOps, array_lengthof(LoadRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); InVals.push_back(retval); - resoffset += sz/8; + resoffset += sz / 8; } - } - else { + } else { SmallVector<EVT, 16> resvtparts; ComputeValueVTs(*this, retTy, resvtparts); assert(Ins.size() == resvtparts.size() && "Unexpected number of return values in non-ABI case"); unsigned paramNum = 0; - for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(EVT(Ins[i].VT) == resvtparts[i] && "Unexpected EVT type in non-ABI case"); unsigned numelems = 1; @@ -754,14 +773,11 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, elemtype = Ins[i].VT.getVectorElementType(); } std::vector<SDValue> tempRetVals; - for (unsigned j=0; j<numelems; ++j) { + for (unsigned j = 0; j < numelems; ++j) { EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue }; - SDValue MoveRetOps[] = { - Chain, - DAG.getConstant(0, MVT::i32), - DAG.getConstant(paramNum, MVT::i32), - InFlag - }; + SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32), + DAG.getConstant(paramNum, MVT::i32), + InFlag }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, MoveRetOps, array_lengthof(MoveRetOps)); Chain = retval.getValue(1); @@ -777,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } } - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true), - DAG.getIntPtrConstant(uniqueCallSite+1, true), + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), + DAG.getIntPtrConstant(uniqueCallSite + 1, true), InFlag); uniqueCallSite++; @@ -792,45 +807,51 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() // (see LegalizeDAG.cpp). This is slow and uses local memory. // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 -SDValue NVPTXTargetLowering:: -LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); SmallVector<SDValue, 8> Ops; unsigned NumOperands = Node->getNumOperands(); - for (unsigned i=0; i < NumOperands; ++i) { + for (unsigned i = 0; i < NumOperands; ++i) { SDValue SubOp = Node->getOperand(i); EVT VVT = SubOp.getNode()->getValueType(0); EVT EltVT = VVT.getVectorElementType(); unsigned NumSubElem = VVT.getVectorNumElements(); - for (unsigned j=0; j < NumSubElem; ++j) { + for (unsigned j = 0; j < NumSubElem; ++j) { Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, DAG.getIntPtrConstant(j))); } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], + Ops.size()); } -SDValue NVPTXTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::RETURNADDR: return SDValue(); - case ISD::FRAMEADDR: return SDValue(); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return Op; + case ISD::RETURNADDR: + return SDValue(); + case ISD::FRAMEADDR: + return SDValue(); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return Op; case ISD::BUILD_VECTOR: case ISD::EXTRACT_SUBVECTOR: return Op; - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); + case ISD::LOAD: + return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } - SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) return LowerLOADi1(Op, DAG); @@ -842,24 +863,22 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // => // v1 = ld i8* addr // v = trunc v1 to i1 -SDValue NVPTXTargetLowering:: -LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(Node); DebugLoc dl = Node->getDebugLoc(); - assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ; + assert(LD->getExtensionType() == ISD::NON_EXTLOAD); assert(Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only"); - SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - LD->getAlignment()); + SDValue newLD = + DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); // The legalizer (the caller) is expecting two values from the legalized // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() // in LegalizeDAG.cpp which also uses MergeValues. - SDValue Ops[] = {result, LD->getChain()}; + SDValue Ops[] = { result, LD->getChain() }; return DAG.getMergeValues(Ops, 2, dl); } @@ -887,7 +906,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { if (!ValVT.isSimple()) return SDValue(); switch (ValVT.getSimpleVT().SimpleTy) { - default: return SDValue(); + default: + return SDValue(); case MVT::v2i8: case MVT::v2i16: case MVT::v2i32: @@ -914,7 +934,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { NeedExt = true; switch (NumElts) { - default: return SDValue(); + default: + return SDValue(); case 2: Opcode = NVPTXISD::StoreV2; break; @@ -947,11 +968,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { MemSDNode *MemSD = cast<MemSDNode>(N); - SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL, - DAG.getVTList(MVT::Other), &Ops[0], - Ops.size(), MemSD->getMemoryVT(), - MemSD->getMemOperand()); - + SDValue NewSt = DAG.getMemIntrinsicNode( + Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), + MemSD->getMemoryVT(), MemSD->getMemOperand()); //return DCI.CombineTo(N, NewSt, true); return NewSt; @@ -964,8 +983,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { // => // v1 = zxt v to i8 // st i8, addr -SDValue NVPTXTargetLowering:: -LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(Node); @@ -976,18 +994,14 @@ LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, - MVT::i8, Tmp3); - SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); return Result; } - -SDValue -NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, - EVT v) const { +SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, + int idx, EVT v) const { std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); std::stringstream suffix; suffix << idx; @@ -1000,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { return getExtSymb(DAG, ".PARAM", idx, v); } -SDValue -NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { +SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { return getExtSymb(DAG, ".HLPPARAM", idx); } // Check to see if the kernel argument is image*_t or sampler_t bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { - static const char *const specialTypes[] = { - "struct._image2d_t", - "struct._image3d_t", - "struct._sampler_t" - }; + static const char *const specialTypes[] = { "struct._image2d_t", + "struct._image3d_t", + "struct._sampler_t" }; const Type *Ty = arg->getType(); const PointerType *PTy = dyn_cast<PointerType>(Ty); @@ -1033,12 +1044,10 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { return false; } -SDValue -NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue NVPTXTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); const DataLayout *TD = getDataLayout(); @@ -1054,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, std::vector<Type *> argTypes; std::vector<const Argument *> theArgs; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { + I != E; ++I) { theArgs.push_back(I); argTypes.push_back(I->getType()); } - assert(argTypes.size() == Ins.size() && - "Ins types and function types did not match"); + //assert(argTypes.size() == Ins.size() && + // "Ins types and function types did not match"); int idx = 0; - for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { + for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) { Type *Ty = argTypes[i]; EVT ObjectVT = getValueType(Ty); - assert(ObjectVT == Ins[i].VT && - "Ins type did not match function type"); + //assert(ObjectVT == Ins[i].VT && + // "Ins type did not match function type"); // If the kernel argument is image*_t or sampler_t, convert it to // a i32 constant holding the parameter position. This can later // matched in the AsmPrinter to output the correct mangled name. - if (isImageOrSamplerVal(theArgs[i], - (theArgs[i]->getParent() ? - theArgs[i]->getParent()->getParent() : 0))) { + if (isImageOrSamplerVal( + theArgs[i], + (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() + : 0))) { assert(isKernel && "Only kernels can have image/sampler params"); - InVals.push_back(DAG.getConstant(i+1, MVT::i32)); + InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); continue; } if (theArgs[i]->use_empty()) { // argument is dead - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + if (ObjectVT.isVector()) { + EVT EltVT = ObjectVT.getVectorElementType(); + unsigned NumElts = ObjectVT.getVectorNumElements(); + for (unsigned vi = 0; vi < NumElts; ++vi) { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT)); + } + } else { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + } continue; } @@ -1089,31 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) { + if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { + if (ObjectVT.isVector()) { + unsigned NumElts = ObjectVT.getVectorNumElements(); + EVT EltVT = ObjectVT.getVectorElementType(); + unsigned Offset = 0; + for (unsigned vi = 0; vi < NumElts; ++vi) { + SDValue A = getParamSymbol(DAG, idx, getPointerTy()); + SDValue B = DAG.getIntPtrConstant(Offset); + SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), + //getParamSymbol(DAG, idx, EltVT), + //DAG.getConstant(Offset, getPointerTy())); + A, B); + Value *SrcValue = Constant::getNullValue(PointerType::get( + EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); + SDValue Ld = DAG.getLoad( + EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false, + false, + TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); + Offset += EltVT.getStoreSizeInBits() / 8; + InVals.push_back(Ld); + } + continue; + } + // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); // Conjure up a value that we can get the address space from. // FIXME: Using a constant here is a hack. - Value *srcValue = Constant::getNullValue(PointerType::get( - ObjectVT.getTypeForEVT(F->getContext()), - llvm::ADDRESS_SPACE_PARAM)); - SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, - MachinePointerInfo(srcValue), false, false, - false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT( - F->getContext()))); + Value *srcValue = Constant::getNullValue( + PointerType::get(ObjectVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); + SDValue p = DAG.getLoad( + ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, + false, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); InVals.push_back(p); - } - else { + } else { // If no ABI, just move the param symbol SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); InVals.push_back(p); } continue; @@ -1130,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); if (isKernel) InVals.push_back(p); else { - SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, - DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), - p); + SDValue p2 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, + DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); InVals.push_back(p2); } } else { // Have to move a set of param symbols to registers and // store them locally and return the local pointer in InVals const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); - assert(elemPtrType && - "Byval parameter should be a pointer type"); + assert(elemPtrType && "Byval parameter should be a pointer type"); Type *elemType = elemPtrType->getElementType(); // Compute the constituent parts SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); unsigned totalsize = 0; - for (unsigned j=0, je=vtparts.size(); j!=je; ++j) + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) totalsize += vtparts[j].getStoreSizeInBits(); - SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> - CreateStackObject(totalsize/8, 16, false), - getPointerTy()); + SDValue localcopy = DAG.getFrameIndex( + MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false), + getPointerTy()); unsigned sizesofar = 0; std::vector<SDValue> theChains; - for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned numElems = 1; - if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); - for (unsigned k=0, ke=numElems; k!=ke; ++k) { + if (vtparts[j].isVector()) + numElems = vtparts[j].getVectorNumElements(); + for (unsigned k = 0, ke = numElems; k != ke; ++k) { EVT tmpvt = vtparts[j]; - if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); + if (tmpvt.isVector()) + tmpvt = tmpvt.getVectorElementType(); SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, getParamSymbol(DAG, idx, tmpvt)); - SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, - DAG.getConstant(sizesofar, getPointerTy())); - theChains.push_back(DAG.getStore(Chain, dl, arg, addr, - MachinePointerInfo(), false, false, 0)); - sizesofar += tmpvt.getStoreSizeInBits()/8; + SDValue addr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, + DAG.getConstant(sizesofar, getPointerTy())); + theChains.push_back(DAG.getStore( + Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0)); + sizesofar += tmpvt.getStoreSizeInBits() / 8; ++idx; } } @@ -1190,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, //} if (!OutChains.empty()) - DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size())); + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], + OutChains.size())); return Chain; } -SDValue -NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, + SelectionDAG &DAG) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); unsigned sizesofar = 0; unsigned idx = 0; - for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue theVal = OutVals[i]; EVT theValType = theVal.getValueType(); unsigned numElems = 1; - if (theValType.isVector()) numElems = theValType.getVectorNumElements(); - for (unsigned j=0,je=numElems; j!=je; ++j) { + if (theValType.isVector()) + numElems = theValType.getVectorNumElements(); + for (unsigned j = 0, je = numElems; j != je; ++j) { SDValue tmpval = theVal; if (theValType.isVector()) tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - theValType.getVectorElementType(), - tmpval, DAG.getIntPtrConstant(j)); - Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, - dl, MVT::Other, - Chain, - DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), + theValType.getVectorElementType(), tmpval, + DAG.getIntPtrConstant(j)); + Chain = DAG.getNode( + isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl, + MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), tmpval); if (theValType.isVector()) - sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; + sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8; else - sizesofar += theValType.getStoreSizeInBits()/8; + sizesofar += theValType.getStoreSizeInBits() / 8; ++idx; } } @@ -1234,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); } -void -NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const -{ +void NVPTXTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { if (Constraint.length() > 1) return; else @@ -1249,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // NVPTX suuport vector of legal types of any length in Intrinsics because the // NVPTX specific type legalizer // will legalize them to the PTX supported length. -bool -NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { +bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { if (isTypeLegal(VT)) return true; if (VT.isVector()) { @@ -1261,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { return false; } - // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as // TgtMemIntrinsic // because we need the information that is only available in the "Value" type // of destination // pointer. In particular, the address space information. -bool -NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, - unsigned Intrinsic) const { +bool NVPTXTargetLowering::getTgtMemIntrinsic( + IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { switch (Intrinsic) { default: return false; @@ -1325,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, /// Used to guide target specific optimizations, like loop strength reduction /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) -bool -NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { +bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg @@ -1345,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, } switch (AM.Scale) { - case 0: // "r", "r+i" or "i" is allowed + case 0: // "r", "r+i" or "i" is allowed break; case 1: - if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. + if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. return false; // Otherwise we have r+i. break; @@ -1385,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } - -std::pair<unsigned, const TargetRegisterClass*> +std::pair<unsigned, const TargetRegisterClass *> NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { @@ -1409,8 +1441,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } - - /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { return 4; @@ -1418,7 +1448,7 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, - SmallVectorImpl<SDValue>& Results) { + SmallVectorImpl<SDValue> &Results) { EVT ResVT = N->getValueType(0); DebugLoc DL = N->getDebugLoc(); @@ -1429,7 +1459,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, // but I'm leaving that as a TODO for now. assert(ResVT.isSimple() && "Can only handle simple types"); switch (ResVT.getSimpleVT().SimpleTy) { - default: return; + default: + return; case MVT::v2i8: case MVT::v2i16: case MVT::v2i32: @@ -1460,7 +1491,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SDVTList LdResVTs; switch (NumElts) { - default: return; + default: + return; case 2: Opcode = NVPTXISD::LoadV2; LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); @@ -1500,14 +1532,14 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SDValue LoadChain = NewLD.getValue(NumElts); - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + SDValue BuildVec = + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); Results.push_back(BuildVec); Results.push_back(LoadChain); } -static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, - SelectionDAG &DAG, +static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SmallVectorImpl<SDValue> &Results) { SDValue Chain = N->getOperand(0); SDValue Intrin = N->getOperand(1); @@ -1515,8 +1547,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // Get the intrinsic ID unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1544,10 +1577,12 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SDVTList LdResVTs; switch (NumElts) { - default: return; + default: + return; case 2: - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1562,8 +1597,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); break; case 4: { - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1586,29 +1622,31 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // Copy regular operands OtherOps.push_back(Chain); // Chain - // Skip operand 1 (intrinsic ID) - // Others + // Skip operand 1 (intrinsic ID) + // Others for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) OtherOps.push_back(N->getOperand(i)); MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); - SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], - OtherOps.size(), MemSD->getMemoryVT(), - MemSD->getMemOperand()); + SDValue NewLD = DAG.getMemIntrinsicNode( + Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), + MemSD->getMemoryVT(), MemSD->getMemOperand()); SmallVector<SDValue, 4> ScalarRes; for (unsigned i = 0; i < NumElts; ++i) { SDValue Res = NewLD.getValue(i); if (NeedTrunc) - Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + Res = + DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); ScalarRes.push_back(Res); } SDValue LoadChain = NewLD.getValue(NumElts); - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + SDValue BuildVec = + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); Results.push_back(BuildVec); Results.push_back(LoadChain); @@ -1629,10 +1667,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // We make sure the memory type is i8, which will be used during isel // to select the proper instruction. - SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, - LdResVTs, &Ops[0], - Ops.size(), MVT::i8, - MemSD->getMemOperand()); + SDValue NewLD = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], + Ops.size(), MVT::i8, MemSD->getMemOperand()); Results.push_back(NewLD.getValue(0)); Results.push_back(NewLD.getValue(1)); @@ -1641,11 +1678,11 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, } } -void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const { +void NVPTXTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { - default: report_fatal_error("Unhandled custom legalization"); + default: + report_fatal_error("Unhandled custom legalization"); case ISD::LOAD: ReplaceLoadVector(N, DAG, Results); return; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 14afc148cb..3cd49d38af 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -87,7 +87,7 @@ public: bool isTypeSupportedInIntrinsic(MVT VT) const; - bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const; /// isLegalAddressingMode - Return true if the addressing mode represented @@ -107,14 +107,13 @@ public: } ConstraintType getConstraintType(const std::string &Constraint) const; - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - virtual SDValue - LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + virtual SDValue LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; @@ -136,17 +135,15 @@ public: NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::i32; - } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual bool shouldSplitVectorElementType(EVT VT) const; private: - const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here - SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = - MVT::i32) const; + SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, + EVT = MVT::i32) const; SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const; SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); @@ -159,8 +156,7 @@ private: SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; - virtual void ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 9e73d80c28..33a63c26f4 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -23,61 +23,55 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include <cstdio> - using namespace llvm; // FIXME: Add the subtarget support on this constructor. NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm) -: NVPTXGenInstrInfo(), - TM(tm), - RegInfo(*this, *TM.getSubtargetImpl()) {} - + : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {} -void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { +void NVPTXInstrInfo::copyPhysReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc) const { if (NVPTX::Int32RegsRegClass.contains(DestReg) && NVPTX::Int32RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int8RegsRegClass.contains(DestReg) && - NVPTX::Int8RegsRegClass.contains(SrcReg)) + NVPTX::Int8RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int1RegsRegClass.contains(DestReg) && - NVPTX::Int1RegsRegClass.contains(SrcReg)) + NVPTX::Int1RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Float32RegsRegClass.contains(DestReg) && - NVPTX::Float32RegsRegClass.contains(SrcReg)) + NVPTX::Float32RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int16RegsRegClass.contains(DestReg) && - NVPTX::Int16RegsRegClass.contains(SrcReg)) + NVPTX::Int16RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int64RegsRegClass.contains(DestReg) && - NVPTX::Int64RegsRegClass.contains(SrcReg)) + NVPTX::Int64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Float64RegsRegClass.contains(DestReg) && - NVPTX::Float64RegsRegClass.contains(SrcReg)) + NVPTX::Float64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else { llvm_unreachable("Don't know how to copy a register"); } } -bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, +bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const { // Look for the appropriate part of TSFlags bool isMove = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> - NVPTX::SimpleMoveShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift; isMove = (TSFlags == 1); if (isMove) { @@ -94,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, return false; } -bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const -{ +bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const { switch (MI.getOpcode()) { - default: return false; + default: + return false; case NVPTX::INT_PTX_SREG_NTID_X: case NVPTX::INT_PTX_SREG_NTID_Y: case NVPTX::INT_PTX_SREG_NTID_Z: @@ -115,12 +109,11 @@ bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const } } - bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isLoad = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> - NVPTX::isLoadShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift; isLoad = (TSFlags == 1); if (isLoad) AddrSpace = getLdStCodeAddrSpace(MI); @@ -130,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isStore = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> - NVPTX::isStoreShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift; isStore = (TSFlags == 1); if (isStore) AddrSpace = getLdStCodeAddrSpace(MI); return isStore; } - bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { unsigned addrspace = 0; if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS) @@ -152,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { return true; } - /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't /// implemented for a target). Upon success, this returns false and returns @@ -176,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { /// Note that RemoveBranch and InsertBranch must be implemented to support /// cases where this method returns success. /// -bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { +bool NVPTXInstrInfo::AnalyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) @@ -208,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it. if (SecondLastInst->getOpcode() == NVPTX::CBranch && LastInst->getOpcode() == NVPTX::GOTO) { - TBB = SecondLastInst->getOperand(1).getMBB(); + TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; @@ -238,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; + if (I == MBB.begin()) + return 0; --I; if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch) return 0; @@ -248,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); - if (I == MBB.begin()) return 1; + if (I == MBB.begin()) + return 1; --I; if (I->getOpcode() != NVPTX::CBranch) return 1; @@ -258,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -unsigned -NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const { +unsigned NVPTXInstrInfo::InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -270,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // One-way branch. if (FBB == 0) { - if (Cond.empty()) // Unconditional branch + if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); - else // Conditional branch - BuildMI(&MBB, DL, get(NVPTX::CBranch)) - .addReg(Cond[0].getReg()).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()) + .addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, DL, get(NVPTX::CBranch)) - .addReg(Cond[0].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB); BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); return 2; } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 7b8e218b05..b1972e9b72 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -23,8 +23,7 @@ namespace llvm { -class NVPTXInstrInfo : public NVPTXGenInstrInfo -{ +class NVPTXInstrInfo : public NVPTXGenInstrInfo { NVPTXTargetMachine &TM; const NVPTXRegisterInfo RegInfo; public: @@ -50,30 +49,26 @@ public: * const TargetRegisterClass *RC) const; */ - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const ; - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, + virtual void copyPhysReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const; bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isReadSpecialReg(MachineInstr &MI) const; - virtual bool CanTailMerge(const MachineInstr *MI) const ; + virtual bool CanTailMerge(const MachineInstr *MI) const; // Branch analysis. - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; + virtual bool AnalyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const; + virtual unsigned InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const { - return MI.getOperand(2).getImm(); + return MI.getOperand(2).getImm(); } }; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index f7fa7aa61d..7c257b4c6a 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -25,18 +25,15 @@ using namespace llvm; -namespace llvm { -FunctionPass *createLowerAggrCopies(); -} +namespace llvm { FunctionPass *createLowerAggrCopies(); } char NVPTXLowerAggrCopies::ID = 0; // Lower MemTransferInst or load-store pair to loop -static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, - Value *dstAddr, Value *len, - //unsigned numLoads, - bool srcVolatile, bool dstVolatile, - LLVMContext &Context, Function &F) { +static void convertTransferToLoop( + Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, + //unsigned numLoads, + bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) { Type *indType = len->getType(); BasicBlock *origBB = splitAt->getParent(); @@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, // srcAddr and dstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = - dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = - dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); @@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, origBB->getTerminator()->setSuccessor(0, loopBB); IRBuilder<> builder(origBB, origBB->getTerminator()); - unsigned dstAS = - dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointer to the type of value getting stored - dstAddr = builder.CreateBitCast(dstAddr, - PointerType::get(val->getType(), dstAS)); + dstAddr = + builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS)); IRBuilder<> loop(loopBB); PHINode *ind = loop.CreatePHI(len->getType(), 0); @@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { //BasicBlock *bb = BI; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - if (LoadInst * load = dyn_cast<LoadInst>(II)) { + ++II) { + if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (load->hasOneUse() == false) continue; + if (load->hasOneUse() == false) + continue; - if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; + if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) + continue; User *use = *(load->use_begin()); - if (StoreInst * store = dyn_cast<StoreInst>(use)) { + if (StoreInst *store = dyn_cast<StoreInst>(use)) { if (store->getOperand(0) != load) //getValueOperand - continue; + continue; aggrLoads.push_back(load); } - } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) { + } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) { Value *len = intr->getLength(); // If the number of elements being copied is greater // than MaxAggrCopySize, lower it to a loop - if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) { + if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemcpys.push_back(intr); } @@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // turn variable length memcpy/memmov into loop aggrMemcpys.push_back(intr); } - } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) { + } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) { Value *len = memsetintr->getLength(); - if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) { + if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemsets.push_back(memsetintr); } @@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { } } } - if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) - && (aggrMemsets.size() == 0)) return false; + if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) && + (aggrMemsets.size() == 0)) + return false; // // Do the transformation of an aggr load/copy/set to a loop diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h index b4a4dbce98..a95c16b1e6 100644 --- a/lib/Target/NVPTX/NVPTXNumRegisters.h +++ b/lib/Target/NVPTX/NVPTXNumRegisters.h @@ -11,10 +11,6 @@ #ifndef NVPTX_NUM_REGISTERS_H #define NVPTX_NUM_REGISTERS_H -namespace llvm { - -const unsigned NVPTXNumRegisters = 396; - -} +namespace llvm { const unsigned NVPTXNumRegisters = 396; } #endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 350a2c5551..282465359b 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -23,69 +23,54 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Target/TargetInstrInfo.h" - using namespace llvm; -namespace llvm -{ -std::string getNVPTXRegClassName (TargetRegisterClass const *RC) { +namespace llvm { +std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { return ".f32"; } if (RC == &NVPTX::Float64RegsRegClass) { return ".f64"; - } - else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::Int64RegsRegClass) { return ".s64"; - } - else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::Int32RegsRegClass) { return ".s32"; - } - else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::Int16RegsRegClass) { return ".s16"; } - // Int8Regs become 16-bit registers in PTX - else if (RC == &NVPTX::Int8RegsRegClass) { + // Int8Regs become 16-bit registers in PTX + else if (RC == &NVPTX::Int8RegsRegClass) { return ".s16"; - } - else if (RC == &NVPTX::Int1RegsRegClass) { + } else if (RC == &NVPTX::Int1RegsRegClass) { return ".pred"; - } - else if (RC == &NVPTX::SpecialRegsRegClass) { + } else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; - } - else { + } else { return "INTERNAL"; } return ""; } -std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { +std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { return "%f"; } if (RC == &NVPTX::Float64RegsRegClass) { return "%fd"; - } - else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::Int64RegsRegClass) { return "%rd"; - } - else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::Int32RegsRegClass) { return "%r"; - } - else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::Int16RegsRegClass) { return "%rs"; - } - else if (RC == &NVPTX::Int8RegsRegClass) { + } else if (RC == &NVPTX::Int8RegsRegClass) { return "%rc"; - } - else if (RC == &NVPTX::Int1RegsRegClass) { + } else if (RC == &NVPTX::Int1RegsRegClass) { return "%p"; - } - else if (RC == &NVPTX::SpecialRegsRegClass) { + } else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; - } - else { + } else { return "INTERNAL"; } return ""; @@ -94,23 +79,22 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st) - : NVPTXGenRegisterInfo(0), - Is64Bit(st.is64Bit()) {} + : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {} #define GET_REGINFO_TARGET_DESC #include "NVPTXGenRegisterInfo.inc" /// NVPTX Callee Saved Registers -const uint16_t* NVPTXRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const { +const uint16_t * +NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const uint16_t CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } // NVPTX Callee Saved Reg Classes -const TargetRegisterClass* const* +const TargetRegisterClass *const * NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 }; return CalleeSavedRegClasses; } @@ -119,10 +103,9 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -void NVPTXRegisterInfo:: -eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { +void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; @@ -130,15 +113,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(FIOperandNum+1).getImm(); + MI.getOperand(FIOperandNum + 1).getImm(); // Using I0 as the frame pointer MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } -int NVPTXRegisterInfo:: -getDwarfRegNum(unsigned RegNum, bool isEH) const { +int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return 0; } @@ -146,7 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return NVPTX::VRFrame; } -unsigned NVPTXRegisterInfo::getRARegister() const { - return 0; -} - +unsigned NVPTXRegisterInfo::getRARegister() const { return 0; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 69f73f213c..d406820661 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -17,7 +17,6 @@ #include "ManagedStringPool.h" #include "llvm/Target/TargetRegisterInfo.h" - #define GET_REGINFO_HEADER #include "NVPTXGenRegisterInfo.inc" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,30 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { private: bool Is64Bit; // Hold Strings that can be free'd all together with NVPTXRegisterInfo - ManagedStringPool ManagedStrPool; + ManagedStringPool ManagedStrPool; public: - NVPTXRegisterInfo(const TargetInstrInfo &tii, - const NVPTXSubtarget &st); - + NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st); //------------------------------------------------------ // Pure virtual functions from TargetRegisterInfo //------------------------------------------------------ // NVPTX callee saved registers - virtual const uint16_t* + virtual const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const; // NVPTX callee saved register classes - virtual const TargetRegisterClass* const * + virtual const TargetRegisterClass *const * getCalleeSavedRegClasses(const MachineFunction *MF) const; virtual BitVector getReservedRegs(const MachineFunction &MF) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS=NULL) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = NULL) const; virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; virtual unsigned getFrameRegister(const MachineFunction &MF) const; @@ -74,11 +71,9 @@ public: }; - -std::string getNVPTXRegClassName (const TargetRegisterClass *RC); -std::string getNVPTXRegClassStr (const TargetRegisterClass *RC); +std::string getNVPTXRegClassName(const TargetRegisterClass *RC); +std::string getNVPTXRegClassStr(const TargetRegisterClass *RC); } // end namespace llvm - #endif diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp index babe29500d..83dfe12089 100644 --- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp @@ -21,9 +21,7 @@ using namespace llvm; -namespace llvm { -FunctionPass *createSplitBBatBarPass(); -} +namespace llvm { FunctionPass *createSplitBBatBarPass(); } char NVPTXSplitBBatBar::ID = 0; @@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) { // This interface will most likely not be necessary, because this pass will // not be invoked by the driver, but will be used as a prerequisite to // another pass. -FunctionPass *llvm::createSplitBBatBarPass() { - return new NVPTXSplitBBatBar(); -} +FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 7b62cce2c6..2dcd73dcff 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -22,27 +22,23 @@ using namespace llvm; // Select Driver Interface #include "llvm/Support/CommandLine.h" namespace { -cl::opt<NVPTX::DrvInterface> -DriverInterface(cl::desc("Choose driver interface:"), - cl::values( - clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), - clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), - clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), - clEnumValEnd), - cl::init(NVPTX::NVCL)); +cl::opt<NVPTX::DrvInterface> DriverInterface( + cl::desc("Choose driver interface:"), + cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), + clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), + clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd), + cl::init(NVPTX::NVCL)); } NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) -: NVPTXGenSubtargetInfo(TT, CPU, FS), - Is64Bit(is64Bit), - PTXVersion(0), - SmVersion(10) { + : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), + SmVersion(20) { drvInterface = DriverInterface; // Provide the default CPU if none - std::string defCPU = "sm_10"; + std::string defCPU = "sm_20"; ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index beea77e38d..670077daaa 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -25,7 +25,7 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { - + std::string TargetName; NVPTX::DrvInterface drvInterface; bool Is64Bit; @@ -61,13 +61,10 @@ public: bool hasLDU() const { return SmVersion >= 20; } bool hasGenericLdSt() const { return SmVersion >= 20; } inline bool hasHWROT32() const { return false; } - inline bool hasSWROT32() const { - return true; - } - inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; } + inline bool hasSWROT32() const { return true; } + inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } - bool is64Bit() const { return Is64Bit; } unsigned int getSmVersion() const { return SmVersion; } @@ -96,4 +93,4 @@ public: } // End llvm namespace -#endif // NVPTXSUBTARGET_H +#endif // NVPTXSUBTARGET_H diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index cd765fa8cb..67ca6b58e5 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -45,9 +45,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" - using namespace llvm; +namespace llvm { +void initializeNVVMReflectPass(PassRegistry&); +} extern "C" void LLVMInitializeNVPTXTarget() { // Register the target. @@ -57,52 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() { RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32); RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64); + // FIXME: This pass is really intended to be invoked during IR optimization, + // but it's very NVPTX-specific. + initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); } -NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, - StringRef TT, - StringRef CPU, - StringRef FS, - const TargetOptions& Options, - Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64bit) -: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64bit), - DL(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) -/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { -} - - +NVPTXTargetMachine::NVPTXTargetMachine( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), + FrameLowering( + *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {} void NVPTXTargetMachine32::anchor() {} -NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) -: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { -} +NVPTXTargetMachine32::NVPTXTargetMachine32( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} void NVPTXTargetMachine64::anchor() {} -NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) -: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { -} - +NVPTXTargetMachine64::NVPTXTargetMachine64( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} namespace llvm { class NVPTXPassConfig : public TargetPassConfig { public: NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) {} NVPTXTargetMachine &getNVPTXTargetMachine() const { return getTM<NVPTXTargetMachine>(); @@ -126,6 +118,4 @@ bool NVPTXPassConfig::addInstSelector() { return false; } -bool NVPTXPassConfig::addPreRegAlloc() { - return false; -} +bool NVPTXPassConfig::addPreRegAlloc() { return false; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 1a732be1ad..5fbcf735b4 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef NVPTX_TARGETMACHINE_H #define NVPTX_TARGETMACHINE_H @@ -31,42 +30,40 @@ namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { - NVPTXSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - NVPTXInstrInfo InstrInfo; - NVPTXTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; + NVPTXSubtarget Subtarget; + const DataLayout DL; // Calculates type size & alignment + NVPTXInstrInfo InstrInfo; + NVPTXTargetLowering TLInfo; + TargetSelectionDAGInfo TSInfo; // NVPTX does not have any call stack frame, but need a NVPTX specific // FrameLowering class because TargetFrameLowering is abstract. - NVPTXFrameLowering FrameLowering; + NVPTXFrameLowering FrameLowering; // Hold Strings that can be free'd all together with NVPTXTargetMachine - ManagedStringPool ManagedStrPool; + ManagedStringPool ManagedStrPool; //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); public: - NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OP, - bool is64bit); + NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const DataLayout *getDataLayout() const { return &DL;} - virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;} + virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const DataLayout *getDataLayout() const { return &DL; } + virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual const NVPTXRegisterInfo *getRegisterInfo() const { return &(InstrInfo.getRegisterInfo()); } virtual NVPTXTargetLowering *getTargetLowering() const { - return const_cast<NVPTXTargetLowering*>(&TLInfo); + return const_cast<NVPTXTargetLowering *>(&TLInfo); } virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { @@ -79,22 +76,19 @@ public: //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level); ManagedStringPool *getManagedStrPool() const { - return const_cast<ManagedStringPool*>(&ManagedStrPool); + return const_cast<ManagedStringPool *>(&ManagedStrPool); } virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); // Emission of machine code through JITCodeEmitter is not supported. - virtual bool addPassesToEmitMachineCode(PassManagerBase &, - JITCodeEmitter &, + virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, bool = true) { return true; } // Emission of machine code through MCJIT is not supported. - virtual bool addPassesToEmitMC(PassManagerBase &, - MCContext *&, - raw_ostream &, + virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, bool = true) { return true; } @@ -119,7 +113,6 @@ public: CodeGenOpt::Level OL); }; - } // end namespace llvm #endif diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index b5698a2fc0..6ab0e08ad0 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -46,45 +46,43 @@ public: } virtual void Initialize(MCContext &ctx, const TargetMachine &TM) { - TextSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getText()); - DataSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getDataRel()); - BSSSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getBSS()); - ReadOnlySection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getReadOnly()); + TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText()); + DataSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel()); + BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS()); + ReadOnlySection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly()); - StaticCtorSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - StaticDtorSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - LSDASection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - EHFrameSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfLineSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfStrSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfLocSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); + StaticCtorSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + StaticDtorSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + LSDASection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + EHFrameSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfAbbrevSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfInfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLineSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfFrameSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfPubTypesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfDebugInlineSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfStrSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLocSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfARangesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfRangesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfMacroInfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } virtual const MCSection *getSectionForConstant(SectionKind Kind) const { @@ -93,8 +91,7 @@ public: virtual const MCSection * getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, - const TargetMachine &TM) const { + Mangler *Mang, const TargetMachine &TM) const { return DataSection; } diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 1ccc9f7c02..6786eb0224 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t; ManagedStatic<per_module_annot_t> annotationCache; - static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(md && "Invalid mdnode for annotation"); assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); @@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(prop && "Annotation property not a string"); // value - ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1)); + ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1)); assert(Val && "Value operand not a constant int"); std::string keyname = prop->getString().str(); @@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop, bool llvm::isTexture(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], + annot)) { assert((annot == 1) && "Unexpected annotation on a texture symbol"); return true; } @@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) { bool llvm::isSurface(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], + annot)) { assert((annot == 1) && "Unexpected annotation on a surface symbol"); return true; } @@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) { bool llvm::isSampler(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { assert((annot == 1) && "Unexpected annotation on a sampler symbol"); return true; } @@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) { if (const Argument *arg = dyn_cast<Argument>(&val)) { const Function *func = arg->getParent(); std::vector<unsigned> annot; - if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], - annot)) { + if (llvm::findAllNVVMAnnotation( + func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) { const Function *func = arg->getParent(); std::vector<unsigned> annot; if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], - annot)) { + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) { const Function *func = arg->getParent(); std::vector<unsigned> annot; if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], - annot)) { + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) { } bool llvm::getMaxNTIDx(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x)); } bool llvm::getMaxNTIDy(const Function &F, unsigned &y) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], - y)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y)); } bool llvm::getMaxNTIDz(const Function &F, unsigned &z) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], - z)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z)); } bool llvm::getReqNTIDx(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x)); } bool llvm::getReqNTIDy(const Function &F, unsigned &y) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], - y)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y)); } bool llvm::getReqNTIDz(const Function &F, unsigned &z) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], - z)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z)); } bool llvm::getMinCTASm(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x)); } bool llvm::isKernelFunction(const Function &F) { unsigned x = 0; - bool retval = llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], - x); + bool retval = llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x); if (retval == false) { // There is no NVVM metadata, check the calling convention if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) @@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) { else return false; } - return (x==1); + return (x == 1); } bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { std::vector<unsigned> Vs; - bool retval = llvm::findAllNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], - Vs); + bool retval = llvm::findAllNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs); if (retval == false) return false; - for (int i=0, e=Vs.size(); i<e; i++) { + for (int i = 0, e = Vs.size(); i < e; i++) { unsigned v = Vs[i]; - if ( (v >> 16) == index ) { - align = v & 0xFFFF; + if ((v >> 16) == index) { + align = v & 0xFFFF; return true; } } @@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { if (MDNode *alignNode = I.getMetadata("callalign")) { - for (int i=0, n = alignNode->getNumOperands(); - i<n; i++) { + for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) { if (const ConstantInt *CI = - dyn_cast<ConstantInt>(alignNode->getOperand(i))) { + dyn_cast<ConstantInt>(alignNode->getOperand(i))) { unsigned v = CI->getZExtValue(); - if ( (v>>16) == index ) { + if ((v >> 16) == index) { align = v & 0xFFFF; return true; } - if ( (v>>16) > index ) { + if ((v >> 16) > index) { return false; } } @@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) { // consider several special intrinsics in striping pointer casts, and // provide an option to ignore GEP indicies for find out the base address only // which could be used in simple alias disambigurate. -const Value *llvm::skipPointerTransfer(const Value *V, - bool ignore_GEP_indices) { +const Value * +llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) { V = V->stripPointerCasts(); while (true) { if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) { @@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V, // - ignore GEP indicies for find out the base address only, and // - tracking PHINode // which could be used in simple alias disambigurate. -const Value *llvm::skipPointerTransfer(const Value *V, - std::set<const Value *> &processed) { +const Value * +llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) { if (processed.find(V) != processed.end()) return NULL; processed.insert(V); @@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V, return V; } - // The following are some useful utilities for debuggung BasicBlock *llvm::getParentBlock(Value *v) { diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index 247e09b8bc..a208004297 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -23,8 +23,7 @@ #include <string> #include <vector> -namespace llvm -{ +namespace llvm { #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly" #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly" @@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID); /// to pass into type construction of CallInst ctors. This turns a null /// terminated list of pointers (or other value types) into a real live vector. /// -template<typename T> -inline std::vector<T> make_vector(T A, ...) { +template <typename T> inline std::vector<T> make_vector(T A, ...) { va_list Args; va_start(Args, A); std::vector<T> Result; @@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) { bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id); const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices); -const Value *skipPointerTransfer(const Value *V, - std::set<const Value *> &processed); +const Value * +skipPointerTransfer(const Value *V, std::set<const Value *> &processed); BasicBlock *getParentBlock(Value *v); Function *getParentFunction(Value *v); void dumpBlock(Value *v, char *blockName); diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp index 6a0e5328f6..5f074b33a2 100644 --- a/lib/Target/NVPTX/NVPTXutil.cpp +++ b/lib/Target/NVPTX/NVPTXutil.cpp @@ -18,8 +18,7 @@ using namespace llvm; namespace llvm { -bool isParamLoad(const MachineInstr *MI) -{ +bool isParamLoad(const MachineInstr *MI) { if ((MI->getOpcode() != NVPTX::LD_i32_avar) && (MI->getOpcode() != NVPTX::LD_i64_avar)) return false; @@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI) return true; } -#define DATA_MASK 0x7f -#define DIGIT_WIDTH 7 -#define MORE_BYTES 0x80 +#define DATA_MASK 0x7f +#define DIGIT_WIDTH 7 +#define MORE_BYTES 0x80 -static int encode_leb128(uint64_t val, int *nbytes, - char *space, int splen) -{ +static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) { char *a; char *end = space + splen; @@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes, #undef DIGIT_WIDTH #undef MORE_BYTES -uint64_t encode_leb128(const char *str) -{ - union { uint64_t x; char a[8]; } temp64; +uint64_t encode_leb128(const char *str) { + union { + uint64_t x; + char a[8]; + } temp64; temp64.x = 0; - for (unsigned i=0,e=strlen(str); i!=e; ++i) - temp64.a[i] = str[e-1-i]; + for (unsigned i = 0, e = strlen(str); i != e; ++i) + temp64.a[i] = str[e - 1 - i]; char encoded[16]; int nbytes; int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); - (void)retval; - assert(retval == 0 && - "Encoding to leb128 failed"); + (void) retval; + assert(retval == 0 && "Encoding to leb128 failed"); assert(nbytes <= 8 && "Cannot support register names with leb128 encoding > 8 bytes"); temp64.x = 0; - for (int i=0; i<nbytes; ++i) + for (int i = 0; i < nbytes; ++i) temp64.a[i] = encoded[i]; return temp64.x; diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp new file mode 100644 index 0000000000..3bbd1a13da --- /dev/null +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -0,0 +1,193 @@ +//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces occurences of __nvvm_reflect("string") with an +// integer based on -nvvm-reflect-list string=<int> option given to this pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" +#include "llvm/Pass.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <map> +#include <sstream> +#include <string> +#include <vector> + +#define NVVM_REFLECT_FUNCTION "__nvvm_reflect" + +using namespace llvm; + +namespace llvm { void initializeNVVMReflectPass(PassRegistry &); } + +namespace { +class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass { +private: + //std::map<std::string, int> VarMap; + StringMap<int> VarMap; + typedef std::map<std::string, int>::iterator VarMapIter; + Function *reflectFunction; + +public: + static char ID; + NVVMReflect() : ModulePass(ID) { + VarMap.clear(); + reflectFunction = 0; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } + virtual bool runOnModule(Module &); + + void setVarMap(); +}; +} + +static cl::opt<bool> +NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), + cl::desc("NVVM reflection, enabled by default")); + +char NVVMReflect::ID = 0; +INITIALIZE_PASS(NVVMReflect, "nvvm-reflect", + "Replace occurences of __nvvm_reflect() calls with 0/1", false, + false) + +static cl::list<std::string> +ReflectList("nvvm-reflect-list", cl::value_desc("name=0/1"), + cl::desc("A list of string=num assignments, where num=0 or 1"), + cl::ValueRequired); + +/// This function does the same operation as perl's split. +/// For example, calling this with ("a=1,b=2,c=0", ",") will +/// return ["a=1", "b=2", "c=0"] in the return std::vector. +static std::vector<std::string> +Tokenize(const std::string &str, const std::string &delim) { + std::vector<std::string> tokens; + + size_t p0 = 0, p1 = std::string::npos; + while (p0 != std::string::npos) { + p1 = str.find_first_of(delim, p0); + if (p1 != p0) { + std::string token = str.substr(p0, p1 - p0); + tokens.push_back(token); + } + p0 = str.find_first_not_of(delim, p1); + } + + return tokens; +} + +/// The command line can look as follows : +/// -R a=1,b=2 -R c=3,d=0 -R e=2 +/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the +/// ReflectList vector. First, each of ReflectList[i] is 'split' +/// using "," as the delimiter. Then each of this part is split +/// using "=" as the delimiter. +void NVVMReflect::setVarMap() { + for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) { + // DEBUG(dbgs() << "Option : " << ReflectList[i] << std::endl); + std::vector<std::string> nameValList = Tokenize(ReflectList[i], ","); + for (unsigned j = 0, ej = nameValList.size(); j != ej; ++j) { + std::vector<std::string> nameValPair = Tokenize(nameValList[j], "="); + assert(nameValPair.size() == 2 && "name=val expected"); + std::stringstream valstream(nameValPair[1]); + int val; + valstream >> val; + assert((!(valstream.fail())) && "integer value expected"); + VarMap[nameValPair[0]] = val; + } + } +} + +bool NVVMReflect::runOnModule(Module &M) { + if (!NVVMReflectEnabled) + return false; + + setVarMap(); + + reflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION); + + // If reflect function is not used, then there will be + // no entry in the module. + if (reflectFunction == 0) { + return false; + } + + // Validate _reflect function + assert(reflectFunction->isDeclaration() && + "_reflect function should not have a body"); + assert(reflectFunction->getReturnType()->isIntegerTy() && + "_reflect's return type should be integer"); + + std::vector<Instruction *> toRemove; + + // Go through the uses of reflectFunction in this Function. + // Each of them should a CallInst with a ConstantArray argument. + // First validate that. If the c-string corresponding to the + // ConstantArray can be found successfully, see if it can be + // found in VarMap. If so, replace the uses of CallInst with the + // value found in VarMap. If not, replace the use with value 0. + for (Value::use_iterator iter = reflectFunction->use_begin(), + iterEnd = reflectFunction->use_end(); + iter != iterEnd; ++iter) { + assert(isa<CallInst>(*iter) && "Only a call instruction can use _reflect"); + CallInst *reflect = cast<CallInst>(*iter); + + assert((reflect->getNumOperands() == 2) && + "Only one operand expect for _reflect function"); + // In cuda, we will have an extra constant-to-generic conversion of + // the string. + const Value *conv = reflect->getArgOperand(0); + assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion"); + const CallInst *convcall = cast<CallInst>(conv); + const Value *str = convcall->getArgOperand(0); + assert(isa<ConstantExpr>(str) && + "Format of _reflect function not recognized"); + const ConstantExpr *gep = cast<ConstantExpr>(str); + + const Value *sym = gep->getOperand(0); + assert(isa<Constant>(sym) && "Format of _reflect function not recognized"); + + const Constant *symstr = cast<Constant>(sym); + + assert(isa<ConstantDataSequential>(symstr->getOperand(0)) && + "Format of _reflect function not recognized"); + + assert(cast<ConstantDataSequential>(symstr->getOperand(0))->isCString() && + "Format of _reflect function not recognized"); + + std::string reflectArg = + cast<ConstantDataSequential>(symstr->getOperand(0))->getAsString(); + + reflectArg = reflectArg.substr(0, reflectArg.size() - 1); + // DEBUG(dbgs() << "Arg of _reflect : " << reflectArg << std::endl); + + int reflectVal = 0; // The default value is 0 + if (VarMap.find(reflectArg) != VarMap.end()) { + reflectVal = VarMap[reflectArg]; + } + reflect->replaceAllUsesWith( + ConstantInt::get(reflect->getType(), reflectVal)); + toRemove.push_back(reflect); + } + if (toRemove.size() == 0) + return false; + + for (unsigned i = 0, e = toRemove.size(); i != e; ++i) + toRemove[i]->eraseFromParent(); + return true; +} diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp index 6c801b875e..cc7d4dc5ec 100644 --- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp +++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp @@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64; extern "C" void LLVMInitializeNVPTXTargetInfo() { RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx", - "NVIDIA PTX 32-bit"); + "NVIDIA PTX 32-bit"); RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64", - "NVIDIA PTX 64-bit"); + "NVIDIA PTX 64-bit"); } diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h index a7347efd78..45cc0b8b67 100644 --- a/lib/Target/NVPTX/cl_common_defines.h +++ b/lib/Target/NVPTX/cl_common_defines.h @@ -24,22 +24,21 @@ enum { CLK_LUMINANCE = 0x10B9 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) - , + , CLK_Rx = 0x10BA, CLK_RGx = 0x10BB, CLK_RGBx = 0x10BC #endif }; - typedef enum clk_channel_type { // valid formats for float return types - CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 - CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 - CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 - CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 - CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half - CLK_FLOAT = 0x10DE, // four channel RGBA float + CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 + CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 + CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 + CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 + CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half + CLK_FLOAT = 0x10DE, // four channel RGBA float #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) CLK_UNORM_SHORT_565 = 0x10D4, @@ -48,7 +47,7 @@ typedef enum clk_channel_type { #endif // valid only for integer return types - CLK_SIGNED_INT8 = 0x10D7, + CLK_SIGNED_INT8 = 0x10D7, CLK_SIGNED_INT16 = 0x10D8, CLK_SIGNED_INT32 = 0x10D9, CLK_UNSIGNED_INT8 = 0x10DA, @@ -56,70 +55,68 @@ typedef enum clk_channel_type { CLK_UNSIGNED_INT32 = 0x10DC, // CI SPI for CPU - __CLK_UNORM_INT8888 , // four channel ARGB unorm8 - __CLK_UNORM_INT8888R, // four channel BGRA unorm8 + __CLK_UNORM_INT8888, // four channel ARGB unorm8 + __CLK_UNORM_INT8888R, // four channel BGRA unorm8 __CLK_VALID_IMAGE_TYPE_COUNT, __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT, - __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to - // represent any image type - __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1 -}clk_channel_type; + __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to + // represent any image type + __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1 +} clk_channel_type; typedef enum clk_sampler_type { - __CLK_ADDRESS_BASE = 0, - CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, + __CLK_ADDRESS_BASE = 0, + CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) - CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, + CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, #endif - __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | - CLK_ADDRESS_CLAMP_TO_EDGE | - CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, - __CLK_ADDRESS_BITS = 3, // number of bits required to - // represent address info - - __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, - CLK_NORMALIZED_COORDS_FALSE = 0, - CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, - __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE | - CLK_NORMALIZED_COORDS_TRUE, - __CLK_NORMALIZED_BITS = 1, // number of bits required to - // represent normalization - - __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + - __CLK_NORMALIZED_BITS, - CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, - CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, - CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, - __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | - CLK_FILTER_ANISOTROPIC, - __CLK_FILTER_BITS = 2, // number of bits required to - // represent address info - - __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, - CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, - CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, - CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, - __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | - CLK_MIP_ANISOTROPIC, - __CLK_MIP_BITS = 2, - - __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, - __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | - __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, - - __CLK_ANISOTROPIC_RATIO_BITS = 5, - __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >> - (__CLK_ANISOTROPIC_RATIO_BITS-1) + __CLK_ADDRESS_MASK = + CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, + __CLK_ADDRESS_BITS = 3, // number of bits required to + // represent address info + + __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, + CLK_NORMALIZED_COORDS_FALSE = 0, + CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, + __CLK_NORMALIZED_MASK = + CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE, + __CLK_NORMALIZED_BITS = 1, // number of bits required to + // represent normalization + + __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS, + CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, + CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, + CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, + __CLK_FILTER_MASK = + CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC, + __CLK_FILTER_BITS = 2, // number of bits required to + // represent address info + + __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, + CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, + CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, + CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, + __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC, + __CLK_MIP_BITS = 2, + + __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, + __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | + __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, + + __CLK_ANISOTROPIC_RATIO_BITS = 5, + __CLK_ANISOTROPIC_RATIO_MASK = + (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1) } clk_sampler_type; // Memory synchronization -#define CLK_LOCAL_MEM_FENCE (1 << 0) -#define CLK_GLOBAL_MEM_FENCE (1 << 1) +#define CLK_LOCAL_MEM_FENCE (1 << 0) +#define CLK_GLOBAL_MEM_FENCE (1 << 1) #endif // __CL_COMMON_DEFINES_H__ |