Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions clang/lib/CodeGen/CGCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,13 +237,9 @@ void CodeGenModule::EmitDefinitionAsAlias(GlobalDecl AliasDecl,
SetCommonAttributes(AliasDecl, Alias);
}

// For an implicit __host__ __device__ destructor, this trap body is reachable
// only when a host-allocated object is destroyed on the device through the
// vtable. HIP documents that pattern as invalid: an object with virtual
// member functions constructed on the host cannot be destroyed on the device.
// Device-side construction either pulls the dtor in as an organic device
// caller (errors surface in Sema) or compiles cleanly (the real body is
// emitted, no trap).
// Invalid implicit H+D functions get a trap body when CodeGen still needs a
// device symbol, such as a vtable slot or explicit instantiation symbol.
// Organic device use surfaces the original Sema diagnostics instead.
bool CodeGenModule::tryEmitCUDADeviceInvalidFunctionBody(GlobalDecl GD,
llvm::Function *Fn) {
if (!getLangOpts().CUDAIsDevice)
Expand Down
13 changes: 7 additions & 6 deletions clang/lib/Sema/Sema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2103,14 +2103,15 @@ void Sema::emitDeferredDiags() {
ExternalSource->ReadDeclsToCheckForDeferredDiags(
DeclsToCheckForDeferredDiags);

// For each implicit-H+D-explicit-inst function with deferred errors but no
// organic device caller, drop the diagnostics and mark for a trap body.
auto ClassifyImplicitHDExplicitInst = [&]() {
// For selected implicit-H+D functions with deferred device errors but no
// organic device caller, drop diagnostics and mark a trap body if CodeGen
// still needs a device symbol.
auto ClassifyImplicitHDDeviceDiags = [&]() {
if (!LangOpts.CUDAIsDevice)
return;
for (auto &Pair : DeviceDeferredDiags) {
const FunctionDecl *FD = Pair.first;
if (!SemaCUDA::isImplicitHDExplicitInstantiation(FD))
if (!SemaCUDA::isImplicitHostDeviceFunction(FD))
continue;
if (CUDA().DeviceKnownEmittedFns.count(FD))
continue;
Expand All @@ -2129,14 +2130,14 @@ void Sema::emitDeferredDiags() {

if ((DeviceDeferredDiags.empty() && !LangOpts.OpenMP) ||
DeclsToCheckForDeferredDiags.empty()) {
ClassifyImplicitHDExplicitInst();
ClassifyImplicitHDDeviceDiags();
return;
}

DeferredDiagnosticsEmitter DDE(*this);
for (auto *D : DeclsToCheckForDeferredDiags)
DDE.checkRecordedDecl(D);
ClassifyImplicitHDExplicitInst();
ClassifyImplicitHDDeviceDiags();
DDE.emitCollectedDiags();
}

Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Sema/SemaBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc,
bool ShouldDefer = getLangOpts().CUDA && getLangOpts().GPUDeferDiag &&
DiagnosticIDs::isDeferrable(DiagID) &&
(SemaRef.DeferDiags || !IsError);
// Even without -fgpu-defer-diag, defer device-side errors inside an
// implicit-H+D explicit instantiation so end-of-TU classification can
// choose between surfacing them or emitting a trap body.
// Even without -fgpu-defer-diag, defer device-side errors inside selected
// implicit-H+D functions so end-of-TU classification can choose between
// surfacing them, discarding them, or emitting a trap body.
if (!ShouldDefer && getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
DiagnosticIDs::isDeferrable(DiagID) &&
SemaCUDA::isImplicitHDExplicitInstantiation(
SemaCUDA::isImplicitHostDeviceFunction(
SemaRef.getCurFunctionDecl(/*AllowLambda=*/true)))
ShouldDefer = true;
auto SetIsLastErrorImmediate = [&](bool Flag) {
Expand Down
38 changes: 18 additions & 20 deletions clang/lib/Sema/SemaCUDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,7 @@ SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,
if (SemaRef.IsLastErrorImmediate &&
getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
return SemaDiagnosticBuilder::K_Immediate;
if (isImplicitHDExplicitInstantiation(CurFunContext))
if (isImplicitHostDeviceFunction(CurFunContext))
return SemaDiagnosticBuilder::K_Deferred;
return (SemaRef.getEmissionStatus(CurFunContext) ==
Sema::FunctionEmissionStatus::Emitted)
Expand Down Expand Up @@ -995,25 +995,23 @@ bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
// Otherwise, mark the call in our call graph so we can traverse it later.
bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) ==
Sema::FunctionEmissionStatus::Emitted;
bool CallerIsImplicitHDExplicitInst =
isImplicitHDExplicitInstantiation(Caller);
SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
CallerKnownEmitted,
CallerIsImplicitHDExplicitInst] {
switch (IdentifyPreference(Caller, Callee)) {
case CFP_Never:
case CFP_WrongSide:
assert(Caller && "Never/wrongSide calls require a non-null caller");
// If we know the caller will be emitted, we know this wrong-side call
// will be emitted, so it's an immediate error. Otherwise, defer the
// error until we know the caller is emitted.
return (CallerKnownEmitted && !CallerIsImplicitHDExplicitInst)
? SemaDiagnosticBuilder::K_ImmediateWithCallStack
: SemaDiagnosticBuilder::K_Deferred;
default:
return SemaDiagnosticBuilder::K_Nop;
}
}();
bool DeferImplicitHDDeviceDiag = isImplicitHostDeviceFunction(Caller);
SemaDiagnosticBuilder::Kind DiagKind =
[this, Caller, Callee, CallerKnownEmitted, DeferImplicitHDDeviceDiag] {
switch (IdentifyPreference(Caller, Callee)) {
case CFP_Never:
case CFP_WrongSide:
assert(Caller && "Never/wrongSide calls require a non-null caller");
// If we know the caller will be emitted, we know this wrong-side call
// will be emitted, so it's an immediate error. Otherwise, defer the
// error until we know the caller is emitted.
return (CallerKnownEmitted && !DeferImplicitHDDeviceDiag)
? SemaDiagnosticBuilder::K_ImmediateWithCallStack
: SemaDiagnosticBuilder::K_Deferred;
default:
return SemaDiagnosticBuilder::K_Nop;
}
}();

bool IsDeviceKernelCall = Callee == getASTContext().getcudaLaunchDeviceDecl();
bool CallerHD = Caller && Caller->hasAttr<CUDAHostAttr>() &&
Expand Down
74 changes: 74 additions & 0 deletions clang/test/SemaCUDA/implicit-hd-deferred-host-call-unused.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
// RUN: -foffload-implicit-host-device-templates -std=c++14 \
// RUN: -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -foffload-implicit-host-device-templates -std=c++14 \
// RUN: -fsyntax-only -verify %s

#include "Inputs/cuda.h"

__host__ constexpr int host_only_constexpr_unused() { return 1; }

constexpr int constexpr_unused(int x) {
return x + host_only_constexpr_unused();
}

extern "C" int host_only_template_unused();

template <typename T> int template_unused(T x) {
return x + host_only_template_unused();
}

extern "C" int host_only_forced_unused();

#pragma clang force_cuda_host_device begin
int forced_unused(int x) {
return x + host_only_forced_unused();
}
#pragma clang force_cuda_host_device end

void host_context() {
(void)constexpr_unused(1);
(void)template_unused(1);
(void)forced_unused(1);
}

__host__ constexpr int host_only_constexpr_used() { return 1; }
// expected-note@-1 {{'host_only_constexpr_used' declared here}}

constexpr int constexpr_used(int x) {
return x + host_only_constexpr_used();
// expected-error@-1 {{reference to __host__ function 'host_only_constexpr_used' in __host__ __device__ function}}
}

extern "C" int host_only_template_used();
// expected-note@-1 {{'host_only_template_used' declared here}}

template <typename T> int template_used(T x) {
return x + host_only_template_used();
// expected-error@-1 {{reference to __host__ function 'host_only_template_used' in __host__ __device__ function}}
}

extern "C" int host_only_forced_used();
// expected-note@-1 {{'host_only_forced_used' declared here}}

#pragma clang force_cuda_host_device begin
int forced_used(int x) {
return x + host_only_forced_used();
// expected-error@-1 {{reference to __host__ function 'host_only_forced_used' in __host__ __device__ function}}
}
#pragma clang force_cuda_host_device end

__device__ int device_caller() {
return constexpr_used(1) + template_used(1) + forced_used(1);
// expected-note@-1 {{called by 'device_caller'}}
// expected-note@-2 {{called by 'device_caller'}}
// expected-note@-3 {{called by 'device_caller'}}
}

__global__ void kernel(int *out) {
*out = device_caller();
// expected-note@-1 {{called by 'kernel'}}
// expected-note@-2 {{called by 'kernel'}}
// expected-note@-3 {{called by 'kernel'}}
}
59 changes: 59 additions & 0 deletions clang/test/SemaCUDA/implicit-hd-deferred-overload-unused.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
// RUN: -foffload-implicit-host-device-templates -std=c++14 \
// RUN: -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -foffload-implicit-host-device-templates -std=c++14 \
// RUN: -fsyntax-only -verify %s

#include "Inputs/cuda.h"

__host__ __device__ constexpr int pick_constexpr_unused(long);
__host__ __device__ constexpr int pick_constexpr_unused(unsigned long);

constexpr int constexpr_unused(int x) {
return pick_constexpr_unused(x);
}

__host__ __device__ constexpr int pick_constexpr_used(long);
// expected-note@-1 {{candidate function}}
__host__ __device__ constexpr int pick_constexpr_used(unsigned long);
// expected-note@-1 {{candidate function}}

constexpr int constexpr_used(int x) {
return pick_constexpr_used(x);
// expected-error@-1 {{call to 'pick_constexpr_used' is ambiguous}}
}

__host__ __device__ int pick_template_unused(long);
__host__ __device__ int pick_template_unused(unsigned long);

template <typename T> int template_unused(T x) {
return pick_template_unused(x);
}

void host_only() {
(void)constexpr_unused(1);
(void)template_unused(1);
}

__host__ __device__ int pick_template_used(long);
// expected-note@-1 {{candidate function}}
__host__ __device__ int pick_template_used(unsigned long);
// expected-note@-1 {{candidate function}}

template <typename T> int template_used(T x) {
return pick_template_used(x);
// expected-error@-1 {{call to 'pick_template_used' is ambiguous}}
}

__device__ int device_caller() {
return constexpr_used(1) + template_used(1);
// expected-note@-1 {{called by 'device_caller'}}
// expected-note@-2 {{called by 'device_caller'}}
}

__global__ void kernel(int *out) {
*out = device_caller();
// expected-note@-1 {{called by 'kernel'}}
// expected-note@-2 {{called by 'kernel'}}
}
Loading