diff options
author | Justin Holewinski <jholewinski@nvidia.com> | 2013-03-30 14:38:24 +0000 |
---|---|---|
committer | Justin Holewinski <jholewinski@nvidia.com> | 2013-03-30 14:38:24 +0000 |
commit | dca8f336e6da2b50eb965535d81d603e39294f9c (patch) | |
tree | 6c898764f5eead3bf1fc49e74b28762e6f024bef | |
parent | fe6b2713656c2d1bf559be357f0e0bf2d09bebd6 (diff) |
Use kernel metadata to differentiate between kernel and device
functions for the NVPTX target.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@178418 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 52 | ||||
-rw-r--r-- | test/CodeGenCUDA/ptx-kernels.cu | 10 | ||||
-rw-r--r-- | test/CodeGenOpenCL/ptx-calls.cl | 7 | ||||
-rw-r--r-- | test/CodeGenOpenCL/ptx-kernels.cl | 5 |
4 files changed, 39 insertions, 35 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 524c5b332f..931502d779 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -4017,7 +4017,7 @@ namespace { class NVPTXABIInfo : public ABIInfo { public: - NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) { setRuntimeCC(); } + NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; @@ -4025,8 +4025,6 @@ public: virtual void computeInfo(CGFunctionInfo &FI) const; virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CFG) const; -private: - void setRuntimeCC(); }; class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { @@ -4036,6 +4034,8 @@ public: virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const; +private: + static void addKernelMetadata(llvm::Function *F); }; ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { @@ -4066,25 +4066,6 @@ void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.setEffectiveCallingConvention(getRuntimeCC()); } -void NVPTXABIInfo::setRuntimeCC() { - // Calling convention as default by an ABI. - // We're still using the PTX_Kernel/PTX_Device calling conventions here, - // but we should switch to NVVM metadata later on. - const LangOptions &LangOpts = getContext().getLangOpts(); - if (LangOpts.OpenCL || LangOpts.CUDA) { - // If we are in OpenCL or CUDA mode, then default to device functions - RuntimeCC = llvm::CallingConv::PTX_Device; - } else { - // If we are in standard C/C++ mode, use the triple to decide on the default - StringRef Env = - getContext().getTargetInfo().getTriple().getEnvironmentName(); - if (Env == "device") - RuntimeCC = llvm::CallingConv::PTX_Device; - else - RuntimeCC = llvm::CallingConv::PTX_Kernel; - } -} - llvm::Value *NVPTXABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CFG) const { llvm_unreachable("NVPTX does not support varargs"); @@ -4100,11 +4081,11 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, // Perform special handling in OpenCL mode if (M.getLangOpts().OpenCL) { - // Use OpenCL function attributes to set proper calling conventions + // Use OpenCL function attributes to check for kernel functions // By default, all functions are device functions if (FD->hasAttr<OpenCLKernelAttr>()) { - // OpenCL __kernel functions get a kernel calling convention - F->setCallingConv(llvm::CallingConv::PTX_Kernel); + // OpenCL __kernel functions get kernel metadata + addKernelMetadata(F); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } @@ -4112,14 +4093,31 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, // Perform special handling in CUDA mode. if (M.getLangOpts().CUDA) { - // CUDA __global__ functions get a kernel calling convention. Since + // CUDA __global__ functions get a kernel metadata entry. Since // __global__ functions cannot be called from the device, we do not // need to set the noinline attribute. if (FD->getAttr<CUDAGlobalAttr>()) - F->setCallingConv(llvm::CallingConv::PTX_Kernel); + addKernelMetadata(F); } } +void NVPTXTargetCodeGenInfo::addKernelMetadata(llvm::Function *F) { + llvm::Module *M = F->getParent(); + llvm::LLVMContext &Ctx = M->getContext(); + + // Get "nvvm.annotations" metadata node + llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + + // Create !{<func-ref>, metadata !"kernel", i32 1} node + llvm::SmallVector<llvm::Value *, 3> MDVals; + MDVals.push_back(F); + MDVals.push_back(llvm::MDString::get(Ctx, "kernel")); + MDVals.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1)); + + // Append metadata to nvvm.annotations + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); +} + } //===----------------------------------------------------------------------===// diff --git a/test/CodeGenCUDA/ptx-kernels.cu b/test/CodeGenCUDA/ptx-kernels.cu index f0bf2952a1..8d34f4f3a6 100644 --- a/test/CodeGenCUDA/ptx-kernels.cu +++ b/test/CodeGenCUDA/ptx-kernels.cu @@ -2,11 +2,15 @@ #include "../SemaCUDA/cuda.h" -// CHECK: define ptx_device{{.*}}device_function +// CHECK: define void @device_function +extern "C" __device__ void device_function() {} -// CHECK: define ptx_kernel{{.*}}global_function +// CHECK: define void @global_function +extern "C" __global__ void global_function() { - // CHECK: call ptx_device{{.*}}device_function + // CHECK: call void @device_function device_function(); } + +// CHECK: !{{[0-9]+}} = metadata !{void ()* @global_function, metadata !"kernel", i32 1} diff --git a/test/CodeGenOpenCL/ptx-calls.cl b/test/CodeGenOpenCL/ptx-calls.cl index 34a21c6c1d..d9904513e5 100644 --- a/test/CodeGenOpenCL/ptx-calls.cl +++ b/test/CodeGenOpenCL/ptx-calls.cl @@ -2,11 +2,12 @@ void device_function() { } -// CHECK: define ptx_device void @device_function() +// CHECK: define void @device_function() __kernel void kernel_function() { device_function(); } -// CHECK: define ptx_kernel void @kernel_function() -// CHECK: call ptx_device void @device_function() +// CHECK: define void @kernel_function() +// CHECK: call void @device_function() +// CHECK: !{{[0-9]+}} = metadata !{void ()* @kernel_function, metadata !"kernel", i32 1} diff --git a/test/CodeGenOpenCL/ptx-kernels.cl b/test/CodeGenOpenCL/ptx-kernels.cl index 1d7e497b7c..07648e4015 100644 --- a/test/CodeGenOpenCL/ptx-kernels.cl +++ b/test/CodeGenOpenCL/ptx-kernels.cl @@ -2,9 +2,10 @@ void device_function() { } -// CHECK: define ptx_device void @device_function() +// CHECK: define void @device_function() __kernel void kernel_function() { } -// CHECK: define ptx_kernel void @kernel_function() +// CHECK: define void @kernel_function() +// CHECK: !{{[0-9]+}} = metadata !{void ()* @kernel_function, metadata !"kernel", i32 1} |