aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2013-03-30 14:38:24 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2013-03-30 14:38:24 +0000
commitdca8f336e6da2b50eb965535d81d603e39294f9c (patch)
tree6c898764f5eead3bf1fc49e74b28762e6f024bef
parentfe6b2713656c2d1bf559be357f0e0bf2d09bebd6 (diff)
Use kernel metadata to differentiate between kernel and device
functions for the NVPTX target. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@178418 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/TargetInfo.cpp52
-rw-r--r--test/CodeGenCUDA/ptx-kernels.cu10
-rw-r--r--test/CodeGenOpenCL/ptx-calls.cl7
-rw-r--r--test/CodeGenOpenCL/ptx-kernels.cl5
4 files changed, 39 insertions, 35 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 524c5b332f..931502d779 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -4017,7 +4017,7 @@ namespace {
class NVPTXABIInfo : public ABIInfo {
public:
- NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) { setRuntimeCC(); }
+ NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -4025,8 +4025,6 @@ public:
virtual void computeInfo(CGFunctionInfo &FI) const;
virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
CodeGenFunction &CFG) const;
-private:
- void setRuntimeCC();
};
class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4036,6 +4034,8 @@ public:
virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const;
+private:
+ static void addKernelMetadata(llvm::Function *F);
};
ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
@@ -4066,25 +4066,6 @@ void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.setEffectiveCallingConvention(getRuntimeCC());
}
-void NVPTXABIInfo::setRuntimeCC() {
- // Calling convention as default by an ABI.
- // We're still using the PTX_Kernel/PTX_Device calling conventions here,
- // but we should switch to NVVM metadata later on.
- const LangOptions &LangOpts = getContext().getLangOpts();
- if (LangOpts.OpenCL || LangOpts.CUDA) {
- // If we are in OpenCL or CUDA mode, then default to device functions
- RuntimeCC = llvm::CallingConv::PTX_Device;
- } else {
- // If we are in standard C/C++ mode, use the triple to decide on the default
- StringRef Env =
- getContext().getTargetInfo().getTriple().getEnvironmentName();
- if (Env == "device")
- RuntimeCC = llvm::CallingConv::PTX_Device;
- else
- RuntimeCC = llvm::CallingConv::PTX_Kernel;
- }
-}
-
llvm::Value *NVPTXABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
CodeGenFunction &CFG) const {
llvm_unreachable("NVPTX does not support varargs");
@@ -4100,11 +4081,11 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
// Perform special handling in OpenCL mode
if (M.getLangOpts().OpenCL) {
- // Use OpenCL function attributes to set proper calling conventions
+ // Use OpenCL function attributes to check for kernel functions
// By default, all functions are device functions
if (FD->hasAttr<OpenCLKernelAttr>()) {
- // OpenCL __kernel functions get a kernel calling convention
- F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+ // OpenCL __kernel functions get kernel metadata
+ addKernelMetadata(F);
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
}
@@ -4112,14 +4093,31 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
// Perform special handling in CUDA mode.
if (M.getLangOpts().CUDA) {
- // CUDA __global__ functions get a kernel calling convention. Since
+ // CUDA __global__ functions get a kernel metadata entry. Since
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
if (FD->getAttr<CUDAGlobalAttr>())
- F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+ addKernelMetadata(F);
}
}
+void NVPTXTargetCodeGenInfo::addKernelMetadata(llvm::Function *F) {
+ llvm::Module *M = F->getParent();
+ llvm::LLVMContext &Ctx = M->getContext();
+
+ // Get "nvvm.annotations" metadata node
+ llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+ // Create !{<func-ref>, metadata !"kernel", i32 1} node
+ llvm::SmallVector<llvm::Value *, 3> MDVals;
+ MDVals.push_back(F);
+ MDVals.push_back(llvm::MDString::get(Ctx, "kernel"));
+ MDVals.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1));
+
+ // Append metadata to nvvm.annotations
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
}
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGenCUDA/ptx-kernels.cu b/test/CodeGenCUDA/ptx-kernels.cu
index f0bf2952a1..8d34f4f3a6 100644
--- a/test/CodeGenCUDA/ptx-kernels.cu
+++ b/test/CodeGenCUDA/ptx-kernels.cu
@@ -2,11 +2,15 @@
#include "../SemaCUDA/cuda.h"
-// CHECK: define ptx_device{{.*}}device_function
+// CHECK: define void @device_function
+extern "C"
__device__ void device_function() {}
-// CHECK: define ptx_kernel{{.*}}global_function
+// CHECK: define void @global_function
+extern "C"
__global__ void global_function() {
- // CHECK: call ptx_device{{.*}}device_function
+ // CHECK: call void @device_function
device_function();
}
+
+// CHECK: !{{[0-9]+}} = metadata !{void ()* @global_function, metadata !"kernel", i32 1}
diff --git a/test/CodeGenOpenCL/ptx-calls.cl b/test/CodeGenOpenCL/ptx-calls.cl
index 34a21c6c1d..d9904513e5 100644
--- a/test/CodeGenOpenCL/ptx-calls.cl
+++ b/test/CodeGenOpenCL/ptx-calls.cl
@@ -2,11 +2,12 @@
void device_function() {
}
-// CHECK: define ptx_device void @device_function()
+// CHECK: define void @device_function()
__kernel void kernel_function() {
device_function();
}
-// CHECK: define ptx_kernel void @kernel_function()
-// CHECK: call ptx_device void @device_function()
+// CHECK: define void @kernel_function()
+// CHECK: call void @device_function()
+// CHECK: !{{[0-9]+}} = metadata !{void ()* @kernel_function, metadata !"kernel", i32 1}
diff --git a/test/CodeGenOpenCL/ptx-kernels.cl b/test/CodeGenOpenCL/ptx-kernels.cl
index 1d7e497b7c..07648e4015 100644
--- a/test/CodeGenOpenCL/ptx-kernels.cl
+++ b/test/CodeGenOpenCL/ptx-kernels.cl
@@ -2,9 +2,10 @@
void device_function() {
}
-// CHECK: define ptx_device void @device_function()
+// CHECK: define void @device_function()
__kernel void kernel_function() {
}
-// CHECK: define ptx_kernel void @kernel_function()
+// CHECK: define void @kernel_function()
+// CHECK: !{{[0-9]+}} = metadata !{void ()* @kernel_function, metadata !"kernel", i32 1}