aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/clang/Driver/CC1Options.td7
-rw-r--r--include/clang/Frontend/CodeGenOptions.h2
-rw-r--r--lib/CodeGen/CodeGenModule.cpp17
-rw-r--r--lib/Frontend/CompilerInvocation.cpp3
-rw-r--r--test/CodeGenCUDA/filter-decl.cu40
-rw-r--r--test/CodeGenCUDA/ptx-kernels.cu2
6 files changed, 70 insertions, 1 deletions
diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td
index ffe1eafd61..ef83cbc148 100644
--- a/include/clang/Driver/CC1Options.td
+++ b/include/clang/Driver/CC1Options.td
@@ -714,3 +714,10 @@ def cl_mad_enable : Flag<"-cl-mad-enable">,
HelpText<"OpenCL only. Enable less precise MAD instructions to be generated.">;
def cl_std_EQ : Joined<"-cl-std=">,
HelpText<"OpenCL language standard to compile for">;
+
+//===----------------------------------------------------------------------===//
+// CUDA Options
+//===----------------------------------------------------------------------===//
+
+def fcuda_is_device : Flag<"-fcuda-is-device">,
+ HelpText<"Generate code for CUDA device">;
diff --git a/include/clang/Frontend/CodeGenOptions.h b/include/clang/Frontend/CodeGenOptions.h
index 5f824f2d71..4874c17c79 100644
--- a/include/clang/Frontend/CodeGenOptions.h
+++ b/include/clang/Frontend/CodeGenOptions.h
@@ -37,6 +37,7 @@ public:
unsigned AsmVerbose : 1; /// -dA, -fverbose-asm.
unsigned ObjCAutoRefCountExceptions : 1; /// Whether ARC should be EH-safe.
+ unsigned CUDAIsDevice : 1; /// Set when compiling for CUDA device.
unsigned CXAAtExit : 1; /// Use __cxa_atexit for calling destructors.
unsigned CXXCtorDtorAliases: 1; /// Emit complete ctors/dtors as linker
/// aliases to base ctors when possible.
@@ -143,6 +144,7 @@ public:
public:
CodeGenOptions() {
AsmVerbose = 0;
+ CUDAIsDevice = 0;
CXAAtExit = 1;
CXXCtorDtorAliases = 0;
DataSections = 0;
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index f5e37897bc..d8b9c9d853 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -785,6 +785,23 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
if (Global->hasAttr<AliasAttr>())
return EmitAliasDefinition(GD);
+ // If this is CUDA, be selective about which declarations we emit.
+ if (Features.CUDA) {
+ if (CodeGenOpts.CUDAIsDevice) {
+ if (!Global->hasAttr<CUDADeviceAttr>() &&
+ !Global->hasAttr<CUDAGlobalAttr>() &&
+ !Global->hasAttr<CUDAConstantAttr>() &&
+ !Global->hasAttr<CUDASharedAttr>())
+ return;
+ } else {
+ if (!Global->hasAttr<CUDAHostAttr>() && (
+ Global->hasAttr<CUDADeviceAttr>() ||
+ Global->hasAttr<CUDAConstantAttr>() ||
+ Global->hasAttr<CUDASharedAttr>()))
+ return;
+ }
+ }
+
// Ignore declarations, they will be emitted on their first use.
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(Global)) {
// Forward declarations are emitted lazily on first use.
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 4fb3ee62f0..b35bc65c7e 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -183,6 +183,8 @@ static void CodeGenOptsToArgs(const CodeGenOptions &Opts,
Res.push_back("-mcode-model");
Res.push_back(Opts.CodeModel);
}
+ if (Opts.CUDAIsDevice)
+ Res.push_back("-fcuda-is-device");
if (!Opts.CXAAtExit)
Res.push_back("-fno-use-cxa-atexit");
if (Opts.CXXCtorDtorAliases)
@@ -1028,6 +1030,7 @@ static void ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
Opts.ObjCAutoRefCountExceptions = Args.hasArg(OPT_fobjc_arc_exceptions);
Opts.ObjCRuntimeHasARC = Args.hasArg(OPT_fobjc_runtime_has_arc);
Opts.ObjCRuntimeHasTerminate = Args.hasArg(OPT_fobjc_runtime_has_terminate);
+ Opts.CUDAIsDevice = Args.hasArg(OPT_fcuda_is_device);
Opts.CXAAtExit = !Args.hasArg(OPT_fno_use_cxa_atexit);
Opts.CXXCtorDtorAliases = Args.hasArg(OPT_mconstructor_aliases);
Opts.CodeModel = Args.getLastArgValue(OPT_mcode_model);
diff --git a/test/CodeGenCUDA/filter-decl.cu b/test/CodeGenCUDA/filter-decl.cu
new file mode 100644
index 0000000000..b758632d12
--- /dev/null
+++ b/test/CodeGenCUDA/filter-decl.cu
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-HOST %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefix=CHECK-DEVICE %s
+
+#include "../SemaCUDA/cuda.h"
+
+// CHECK-HOST-NOT: constantdata = global
+// CHECK-DEVICE: constantdata = global
+__constant__ char constantdata[256];
+
+// CHECK-HOST-NOT: devicedata = global
+// CHECK-DEVICE: devicedata = global
+__device__ char devicedata[256];
+
+// CHECK-HOST-NOT: shareddata = global
+// CHECK-DEVICE: shareddata = global
+__shared__ char shareddata[256];
+
+// CHECK-HOST: hostdata = global
+// CHECK-DEVICE-NOT: hostdata = global
+char hostdata[256];
+
+// CHECK-HOST: define{{.*}}implicithostonlyfunc
+// CHECK-DEVICE-NOT: define{{.*}}implicithostonlyfunc
+void implicithostonlyfunc(void) {}
+
+// CHECK-HOST: define{{.*}}explicithostonlyfunc
+// CHECK-DEVICE-NOT: define{{.*}}explicithostonlyfunc
+__host__ void explicithostonlyfunc(void) {}
+
+// CHECK-HOST-NOT: define{{.*}}deviceonlyfunc
+// CHECK-DEVICE: define{{.*}}deviceonlyfunc
+__device__ void deviceonlyfunc(void) {}
+
+// CHECK-HOST: define{{.*}}hostdevicefunc
+// CHECK-DEVICE: define{{.*}}hostdevicefunc
+__host__ __device__ void hostdevicefunc(void) {}
+
+// CHECK-HOST: define{{.*}}globalfunc
+// CHECK-DEVICE: define{{.*}}globalfunc
+__global__ void globalfunc(void) {}
diff --git a/test/CodeGenCUDA/ptx-kernels.cu b/test/CodeGenCUDA/ptx-kernels.cu
index 310fa2a276..ecca8519af 100644
--- a/test/CodeGenCUDA/ptx-kernels.cu
+++ b/test/CodeGenCUDA/ptx-kernels.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple ptx32-unknown-unknown -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple ptx32-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
#include "../SemaCUDA/cuda.h"