diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 3e77a74c7c009..acc6bb6581d85 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -126,6 +126,14 @@ enum class CudaArch { HIPDefault = CudaArch::GFX906, }; +enum class CUDAFunctionTarget { + Device, + Global, + Host, + HostDevice, + InvalidTarget +}; + static inline bool IsNVIDIAGpuArch(CudaArch A) { return A >= CudaArch::SM_20 && A < CudaArch::GFX600; } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 00888b7f7a738..6b9789334811e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -38,6 +38,7 @@ #include "clang/AST/TypeOrdering.h" #include "clang/Basic/BitmaskEnum.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/Cuda.h" #include "clang/Basic/DarwinSDKInfo.h" #include "clang/Basic/ExpressionTraits.h" #include "clang/Basic/Module.h" @@ -183,6 +184,7 @@ class Preprocessor; class PseudoDestructorTypeStorage; class PseudoObjectExpr; class QualType; +class SemaCUDA; class SemaHLSL; class SemaOpenACC; class SemaSYCL; @@ -435,14 +437,6 @@ enum class CXXSpecialMemberKind { Invalid }; -enum class CUDAFunctionTarget { - Device, - Global, - Host, - HostDevice, - InvalidTarget -}; - /// Sema - This implements semantic analysis and AST building for C. /// \nosubgrouping class Sema final : public SemaBase { @@ -486,8 +480,7 @@ class Sema final : public SemaBase { // 35. Code Completion (SemaCodeComplete.cpp) // 36. FixIt Helpers (SemaFixItUtils.cpp) // 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp) - // 38. CUDA (SemaCUDA.cpp) - // 39. OpenMP Directives and Clauses (SemaOpenMP.cpp) + // 38. OpenMP Directives and Clauses (SemaOpenMP.cpp) /// \name Semantic Analysis /// Implementations are in Sema.cpp @@ -981,9 +974,19 @@ class Sema final : public SemaBase { return DelayedDiagnostics.push(pool); } + /// Diagnostics that are emitted only if we discover that the given function + /// must be codegen'ed. Because handling these correctly adds overhead to + /// compilation, this is currently only enabled for CUDA compilations. + SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags; + /// CurContext - This is the current declaration context of parsing. DeclContext *CurContext; + SemaCUDA &CUDA() { + assert(CUDAPtr); + return *CUDAPtr; + } + SemaHLSL &HLSL() { assert(HLSLPtr); return *HLSLPtr; @@ -1029,6 +1032,7 @@ class Sema final : public SemaBase { mutable IdentifierInfo *Ident_super; + std::unique_ptr CUDAPtr; std::unique_ptr HLSLPtr; std::unique_ptr OpenACCPtr; std::unique_ptr SYCLPtr; @@ -12908,258 +12912,6 @@ class Sema final : public SemaBase { // // - /// \name CUDA - /// Implementations are in SemaCUDA.cpp - ///@{ - -public: - /// Increments our count of the number of times we've seen a pragma forcing - /// functions to be __host__ __device__. So long as this count is greater - /// than zero, all functions encountered will be __host__ __device__. - void PushForceCUDAHostDevice(); - - /// Decrements our count of the number of times we've seen a pragma forcing - /// functions to be __host__ __device__. Returns false if the count is 0 - /// before incrementing, so you can emit an error. - bool PopForceCUDAHostDevice(); - - ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, - MultiExprArg ExecConfig, - SourceLocation GGGLoc); - - /// Diagnostics that are emitted only if we discover that the given function - /// must be codegen'ed. Because handling these correctly adds overhead to - /// compilation, this is currently only enabled for CUDA compilations. - SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags; - - /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the - /// key in a hashtable, both the FD and location are hashed. - struct FunctionDeclAndLoc { - CanonicalDeclPtr FD; - SourceLocation Loc; - }; - - /// FunctionDecls and SourceLocations for which CheckCUDACall has emitted a - /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the - /// same deferred diag twice. - llvm::DenseSet LocsWithCUDACallDiags; - - /// An inverse call graph, mapping known-emitted functions to one of their - /// known-emitted callers (plus the location of the call). - /// - /// Functions that we can tell a priori must be emitted aren't added to this - /// map. - llvm::DenseMap, - /* Caller = */ FunctionDeclAndLoc> - DeviceKnownEmittedFns; - - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current - /// context is "used as device code". - /// - /// - If CurContext is a __host__ function, does not emit any diagnostics - /// unless \p EmitOnBothSides is true. - /// - If CurContext is a __device__ or __global__ function, emits the - /// diagnostics immediately. - /// - If CurContext is a __host__ __device__ function and we are compiling for - /// the device, creates a diagnostic which is emitted if and when we realize - /// that the function will be codegen'ed. - /// - /// Example usage: - /// - /// // Variable-length arrays are not allowed in CUDA device code. - /// if (CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) - /// << llvm::to_underlying(CurrentCUDATarget())) - /// return ExprError(); - /// // Otherwise, continue parsing as normal. - SemaDiagnosticBuilder CUDADiagIfDeviceCode(SourceLocation Loc, - unsigned DiagID); - - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current - /// context is "used as host code". - /// - /// Same as CUDADiagIfDeviceCode, with "host" and "device" switched. - SemaDiagnosticBuilder CUDADiagIfHostCode(SourceLocation Loc, unsigned DiagID); - - /// Determines whether the given function is a CUDA device/host/kernel/etc. - /// function. - /// - /// Use this rather than examining the function's attributes yourself -- you - /// will get it wrong. Returns CUDAFunctionTarget::Host if D is null. - CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D, - bool IgnoreImplicitHDAttr = false); - CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs); - - enum CUDAVariableTarget { - CVT_Device, /// Emitted on device side with a shadow variable on host side - CVT_Host, /// Emitted on host side only - CVT_Both, /// Emitted on both sides with different addresses - CVT_Unified, /// Emitted as a unified address, e.g. managed variables - }; - /// Determines whether the given variable is emitted on host or device side. - CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D); - - /// Defines kinds of CUDA global host/device context where a function may be - /// called. - enum CUDATargetContextKind { - CTCK_Unknown, /// Unknown context - CTCK_InitGlobalVar, /// Function called during global variable - /// initialization - }; - - /// Define the current global CUDA host/device context where a function may be - /// called. Only used when a function is called outside of any functions. - struct CUDATargetContext { - CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; - CUDATargetContextKind Kind = CTCK_Unknown; - Decl *D = nullptr; - } CurCUDATargetCtx; - - struct CUDATargetContextRAII { - Sema &S; - CUDATargetContext SavedCtx; - CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D); - ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } - }; - - /// Gets the CUDA target for the current context. - CUDAFunctionTarget CurrentCUDATarget() { - return IdentifyCUDATarget(dyn_cast(CurContext)); - } - - static bool isCUDAImplicitHostDeviceFunction(const FunctionDecl *D); - - // CUDA function call preference. Must be ordered numerically from - // worst to best. - enum CUDAFunctionPreference { - CFP_Never, // Invalid caller/callee combination. - CFP_WrongSide, // Calls from host-device to host or device - // function that do not match current compilation - // mode. - CFP_HostDevice, // Any calls to host/device functions. - CFP_SameSide, // Calls from host-device to host or device - // function matching current compilation mode. - CFP_Native, // host-to-host or device-to-device calls. - }; - - /// Identifies relative preference of a given Caller/Callee - /// combination, based on their host/device attributes. - /// \param Caller function which needs address of \p Callee. - /// nullptr in case of global context. - /// \param Callee target function - /// - /// \returns preference value for particular Caller/Callee combination. - CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, - const FunctionDecl *Callee); - - /// Determines whether Caller may invoke Callee, based on their CUDA - /// host/device attributes. Returns false if the call is not allowed. - /// - /// Note: Will return true for CFP_WrongSide calls. These may appear in - /// semantically correct CUDA programs, but only if they're never codegen'ed. - bool IsAllowedCUDACall(const FunctionDecl *Caller, - const FunctionDecl *Callee) { - return IdentifyCUDAPreference(Caller, Callee) != CFP_Never; - } - - /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, - /// depending on FD and the current compilation settings. - void maybeAddCUDAHostDeviceAttrs(FunctionDecl *FD, - const LookupResult &Previous); - - /// May add implicit CUDAConstantAttr attribute to VD, depending on VD - /// and current compilation settings. - void MaybeAddCUDAConstantAttr(VarDecl *VD); - - /// Check whether we're allowed to call Callee from the current context. - /// - /// - If the call is never allowed in a semantically-correct program - /// (CFP_Never), emits an error and returns false. - /// - /// - If the call is allowed in semantically-correct programs, but only if - /// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to - /// be emitted if and when the caller is codegen'ed, and returns true. - /// - /// Will only create deferred diagnostics for a given SourceLocation once, - /// so you can safely call this multiple times without generating duplicate - /// deferred errors. - /// - /// - Otherwise, returns true without emitting any diagnostics. - bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee); - - void CUDACheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture); - - /// Set __device__ or __host__ __device__ attributes on the given lambda - /// operator() method. - /// - /// CUDA lambdas by default is host device function unless it has explicit - /// host or device attribute. - void CUDASetLambdaAttrs(CXXMethodDecl *Method); - - /// Record \p FD if it is a CUDA/HIP implicit host device function used on - /// device side in device compilation. - void CUDARecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD); - - /// Finds a function in \p Matches with highest calling priority - /// from \p Caller context and erases all functions with lower - /// calling priority. - void EraseUnwantedCUDAMatches( - const FunctionDecl *Caller, - SmallVectorImpl> &Matches); - - /// Given a implicit special member, infer its CUDA target from the - /// calls it needs to make to underlying base/field special members. - /// \param ClassDecl the class for which the member is being created. - /// \param CSM the kind of special member. - /// \param MemberDecl the special member itself. - /// \param ConstRHS true if this is a copy operation with a const object on - /// its RHS. - /// \param Diagnose true if this call should emit diagnostics. - /// \return true if there was an error inferring. - /// The result of this call is implicit CUDA target attribute(s) attached to - /// the member declaration. - bool inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, - CXXSpecialMemberKind CSM, - CXXMethodDecl *MemberDecl, - bool ConstRHS, bool Diagnose); - - /// \return true if \p CD can be considered empty according to CUDA - /// (E.2.3.1 in CUDA 7.5 Programming guide). - bool isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD); - bool isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *CD); - - // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In - // case of error emits appropriate diagnostic and invalidates \p Var. - // - // \details CUDA allows only empty constructors as initializers for global - // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all - // __shared__ variables whether they are local or not (they all are implicitly - // static in CUDA). One exception is that CUDA allows constant initializers - // for __constant__ and __device__ variables. - void checkAllowedCUDAInitializer(VarDecl *VD); - - /// Check whether NewFD is a valid overload for CUDA. Emits - /// diagnostics and invalidates NewFD if not. - void checkCUDATargetOverload(FunctionDecl *NewFD, - const LookupResult &Previous); - /// Copies target attributes from the template TD to the function FD. - void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); - - /// Returns the name of the launch configuration function. This is the name - /// of the function that will be called to configure kernel call, with the - /// parameters specified via <<<>>>. - std::string getCudaConfigureFuncName() const; - -private: - unsigned ForceCUDAHostDeviceDepth = 0; - - ///@} - - // - // - // ------------------------------------------------------------------------- - // - // - /// \name OpenMP Directives and Clauses /// Implementations are in SemaOpenMP.cpp ///@{ @@ -14546,32 +14298,4 @@ std::unique_ptr CreateRISCVIntrinsicManager(Sema &S); } // end namespace clang -namespace llvm { -// Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its -// SourceLocation. -template <> struct DenseMapInfo { - using FunctionDeclAndLoc = clang::Sema::FunctionDeclAndLoc; - using FDBaseInfo = - DenseMapInfo>; - - static FunctionDeclAndLoc getEmptyKey() { - return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()}; - } - - static FunctionDeclAndLoc getTombstoneKey() { - return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()}; - } - - static unsigned getHashValue(const FunctionDeclAndLoc &FDL) { - return hash_combine(FDBaseInfo::getHashValue(FDL.FD), - FDL.Loc.getHashValue()); - } - - static bool isEqual(const FunctionDeclAndLoc &LHS, - const FunctionDeclAndLoc &RHS) { - return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc; - } -}; -} // namespace llvm - #endif diff --git a/clang/include/clang/Sema/SemaBase.h b/clang/include/clang/Sema/SemaBase.h index ff718022fca03..3220f71dd797e 100644 --- a/clang/include/clang/Sema/SemaBase.h +++ b/clang/include/clang/Sema/SemaBase.h @@ -146,7 +146,7 @@ class SemaBase { /// if (SemaDiagnosticBuilder(...) << foo << bar) /// return ExprError(); /// - /// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably + /// But see DiagIfDeviceCode() and DiagIfHostCode() -- you probably /// want to use these instead of creating a SemaDiagnosticBuilder yourself. operator bool() const { return isImmediate(); } diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h new file mode 100644 index 0000000000000..63dc3f4da240b --- /dev/null +++ b/clang/include/clang/Sema/SemaCUDA.h @@ -0,0 +1,304 @@ +//===----- SemaCUDA.h ----- Semantic Analysis for CUDA constructs ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares semantic analysis for CUDA constructs. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_SEMACUDA_H +#define LLVM_CLANG_SEMA_SEMACUDA_H + +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Redeclarable.h" +#include "clang/Basic/Cuda.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Sema/Lookup.h" +#include "clang/Sema/Ownership.h" +#include "clang/Sema/ParsedAttr.h" +#include "clang/Sema/Scope.h" +#include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaBase.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include + +namespace clang { + +enum class CUDAFunctionTarget; + +class SemaCUDA : public SemaBase { +public: + SemaCUDA(Sema &S); + + /// Increments our count of the number of times we've seen a pragma forcing + /// functions to be __host__ __device__. So long as this count is greater + /// than zero, all functions encountered will be __host__ __device__. + void PushForceHostDevice(); + + /// Decrements our count of the number of times we've seen a pragma forcing + /// functions to be __host__ __device__. Returns false if the count is 0 + /// before incrementing, so you can emit an error. + bool PopForceHostDevice(); + + ExprResult ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, + MultiExprArg ExecConfig, + SourceLocation GGGLoc); + + /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the + /// key in a hashtable, both the FD and location are hashed. + struct FunctionDeclAndLoc { + CanonicalDeclPtr FD; + SourceLocation Loc; + }; + + /// FunctionDecls and SourceLocations for which CheckCall has emitted a + /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the + /// same deferred diag twice. + llvm::DenseSet LocsWithCUDACallDiags; + + /// An inverse call graph, mapping known-emitted functions to one of their + /// known-emitted callers (plus the location of the call). + /// + /// Functions that we can tell a priori must be emitted aren't added to this + /// map. + llvm::DenseMap, + /* Caller = */ FunctionDeclAndLoc> + DeviceKnownEmittedFns; + + /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current + /// context is "used as device code". + /// + /// - If CurContext is a __host__ function, does not emit any diagnostics + /// unless \p EmitOnBothSides is true. + /// - If CurContext is a __device__ or __global__ function, emits the + /// diagnostics immediately. + /// - If CurContext is a __host__ __device__ function and we are compiling for + /// the device, creates a diagnostic which is emitted if and when we realize + /// that the function will be codegen'ed. + /// + /// Example usage: + /// + /// // Variable-length arrays are not allowed in CUDA device code. + /// if (DiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentTarget()) + /// return ExprError(); + /// // Otherwise, continue parsing as normal. + SemaDiagnosticBuilder DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); + + /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current + /// context is "used as host code". + /// + /// Same as DiagIfDeviceCode, with "host" and "device" switched. + SemaDiagnosticBuilder DiagIfHostCode(SourceLocation Loc, unsigned DiagID); + + /// Determines whether the given function is a CUDA device/host/kernel/etc. + /// function. + /// + /// Use this rather than examining the function's attributes yourself -- you + /// will get it wrong. Returns CUDAFunctionTarget::Host if D is null. + CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D, + bool IgnoreImplicitHDAttr = false); + CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs); + + enum CUDAVariableTarget { + CVT_Device, /// Emitted on device side with a shadow variable on host side + CVT_Host, /// Emitted on host side only + CVT_Both, /// Emitted on both sides with different addresses + CVT_Unified, /// Emitted as a unified address, e.g. managed variables + }; + /// Determines whether the given variable is emitted on host or device side. + CUDAVariableTarget IdentifyTarget(const VarDecl *D); + + /// Defines kinds of CUDA global host/device context where a function may be + /// called. + enum CUDATargetContextKind { + CTCK_Unknown, /// Unknown context + CTCK_InitGlobalVar, /// Function called during global variable + /// initialization + }; + + /// Define the current global CUDA host/device context where a function may be + /// called. Only used when a function is called outside of any functions. + struct CUDATargetContext { + CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; + CUDATargetContextKind Kind = CTCK_Unknown; + Decl *D = nullptr; + } CurCUDATargetCtx; + + struct CUDATargetContextRAII { + SemaCUDA &S; + SemaCUDA::CUDATargetContext SavedCtx; + CUDATargetContextRAII(SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, + Decl *D); + ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } + }; + + /// Gets the CUDA target for the current context. + CUDAFunctionTarget CurrentTarget() { + return IdentifyTarget(dyn_cast(SemaRef.CurContext)); + } + + static bool isImplicitHostDeviceFunction(const FunctionDecl *D); + + // CUDA function call preference. Must be ordered numerically from + // worst to best. + enum CUDAFunctionPreference { + CFP_Never, // Invalid caller/callee combination. + CFP_WrongSide, // Calls from host-device to host or device + // function that do not match current compilation + // mode. + CFP_HostDevice, // Any calls to host/device functions. + CFP_SameSide, // Calls from host-device to host or device + // function matching current compilation mode. + CFP_Native, // host-to-host or device-to-device calls. + }; + + /// Identifies relative preference of a given Caller/Callee + /// combination, based on their host/device attributes. + /// \param Caller function which needs address of \p Callee. + /// nullptr in case of global context. + /// \param Callee target function + /// + /// \returns preference value for particular Caller/Callee combination. + CUDAFunctionPreference IdentifyPreference(const FunctionDecl *Caller, + const FunctionDecl *Callee); + + /// Determines whether Caller may invoke Callee, based on their CUDA + /// host/device attributes. Returns false if the call is not allowed. + /// + /// Note: Will return true for CFP_WrongSide calls. These may appear in + /// semantically correct CUDA programs, but only if they're never codegen'ed. + bool IsAllowedCall(const FunctionDecl *Caller, const FunctionDecl *Callee) { + return IdentifyPreference(Caller, Callee) != CFP_Never; + } + + /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, + /// depending on FD and the current compilation settings. + void maybeAddHostDeviceAttrs(FunctionDecl *FD, const LookupResult &Previous); + + /// May add implicit CUDAConstantAttr attribute to VD, depending on VD + /// and current compilation settings. + void MaybeAddConstantAttr(VarDecl *VD); + + /// Check whether we're allowed to call Callee from the current context. + /// + /// - If the call is never allowed in a semantically-correct program + /// (CFP_Never), emits an error and returns false. + /// + /// - If the call is allowed in semantically-correct programs, but only if + /// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to + /// be emitted if and when the caller is codegen'ed, and returns true. + /// + /// Will only create deferred diagnostics for a given SourceLocation once, + /// so you can safely call this multiple times without generating duplicate + /// deferred errors. + /// + /// - Otherwise, returns true without emitting any diagnostics. + bool CheckCall(SourceLocation Loc, FunctionDecl *Callee); + + void CheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture); + + /// Set __device__ or __host__ __device__ attributes on the given lambda + /// operator() method. + /// + /// CUDA lambdas by default is host device function unless it has explicit + /// host or device attribute. + void SetLambdaAttrs(CXXMethodDecl *Method); + + /// Record \p FD if it is a CUDA/HIP implicit host device function used on + /// device side in device compilation. + void RecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD); + + /// Finds a function in \p Matches with highest calling priority + /// from \p Caller context and erases all functions with lower + /// calling priority. + void EraseUnwantedMatches( + const FunctionDecl *Caller, + llvm::SmallVectorImpl> + &Matches); + + /// Given a implicit special member, infer its CUDA target from the + /// calls it needs to make to underlying base/field special members. + /// \param ClassDecl the class for which the member is being created. + /// \param CSM the kind of special member. + /// \param MemberDecl the special member itself. + /// \param ConstRHS true if this is a copy operation with a const object on + /// its RHS. + /// \param Diagnose true if this call should emit diagnostics. + /// \return true if there was an error inferring. + /// The result of this call is implicit CUDA target attribute(s) attached to + /// the member declaration. + bool inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, + CXXSpecialMemberKind CSM, + CXXMethodDecl *MemberDecl, + bool ConstRHS, bool Diagnose); + + /// \return true if \p CD can be considered empty according to CUDA + /// (E.2.3.1 in CUDA 7.5 Programming guide). + bool isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD); + bool isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *CD); + + // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In + // case of error emits appropriate diagnostic and invalidates \p Var. + // + // \details CUDA allows only empty constructors as initializers for global + // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all + // __shared__ variables whether they are local or not (they all are implicitly + // static in CUDA). One exception is that CUDA allows constant initializers + // for __constant__ and __device__ variables. + void checkAllowedInitializer(VarDecl *VD); + + /// Check whether NewFD is a valid overload for CUDA. Emits + /// diagnostics and invalidates NewFD if not. + void checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous); + /// Copies target attributes from the template TD to the function FD. + void inheritTargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); + + /// Returns the name of the launch configuration function. This is the name + /// of the function that will be called to configure kernel call, with the + /// parameters specified via <<<>>>. + std::string getConfigureFuncName() const; + +private: + unsigned ForceHostDeviceDepth = 0; + + friend class ASTReader; + friend class ASTWriter; +}; + +} // namespace clang + +namespace llvm { +// Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its +// SourceLocation. +template <> struct DenseMapInfo { + using FunctionDeclAndLoc = clang::SemaCUDA::FunctionDeclAndLoc; + using FDBaseInfo = + DenseMapInfo>; + + static FunctionDeclAndLoc getEmptyKey() { + return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()}; + } + + static FunctionDeclAndLoc getTombstoneKey() { + return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()}; + } + + static unsigned getHashValue(const FunctionDeclAndLoc &FDL) { + return hash_combine(FDBaseInfo::getHashValue(FDL.FD), + FDL.Loc.getHashValue()); + } + + static bool isEqual(const FunctionDeclAndLoc &LHS, + const FunctionDeclAndLoc &RHS) { + return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc; + } +}; +} // namespace llvm + +#endif // LLVM_CLANG_SEMA_SEMACUDA_H diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 6656c1c58dec9..e3fde887f99cb 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -873,7 +873,7 @@ class ASTReader /// Our current depth in #pragma cuda force_host_device begin/end /// macros. - unsigned ForceCUDAHostDeviceDepth = 0; + unsigned ForceHostDeviceDepth = 0; /// The IDs of the declarations Sema stores directly. /// diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 583232f2d610d..951e2210031a1 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -26,6 +26,7 @@ #include "clang/Sema/Lookup.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" @@ -2664,7 +2665,8 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( } } - Sema::CUDATargetContextRAII X(Actions, Sema::CTCK_InitGlobalVar, ThisDecl); + SemaCUDA::CUDATargetContextRAII X(Actions.CUDA(), + SemaCUDA::CTCK_InitGlobalVar, ThisDecl); switch (TheInitKind) { // Parse declarator '=' initializer. case InitKind::Equal: { diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index d08e675604d19..473ec9afd6018 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -30,6 +30,7 @@ #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaSYCL.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/SmallVector.h" @@ -2129,10 +2130,8 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { } if (!LHS.isInvalid()) { - ExprResult ECResult = Actions.ActOnCUDAExecConfigExpr(getCurScope(), - OpenLoc, - ExecConfigExprs, - CloseLoc); + ExprResult ECResult = Actions.CUDA().ActOnExecConfigExpr( + getCurScope(), OpenLoc, ExecConfigExprs, CloseLoc); if (ECResult.isInvalid()) LHS = ExprError(); else diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 0f692e2146a49..3979f75b6020d 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -21,6 +21,7 @@ #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaCUDA.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSwitch.h" #include @@ -3900,8 +3901,8 @@ void PragmaForceCUDAHostDeviceHandler::HandlePragma( } if (Info->isStr("begin")) - Actions.PushForceCUDAHostDevice(); - else if (!Actions.PopForceCUDAHostDevice()) + Actions.CUDA().PushForceHostDevice(); + else if (!Actions.CUDA().PopForceHostDevice()) PP.Diag(FirstTok.getLocation(), diag::err_pragma_cannot_end_force_cuda_host_device); diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index a2ea66f339c8e..8de202f4f7a0c 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -41,6 +41,7 @@ #include "clang/Sema/RISCVIntrinsicManager.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaConsumer.h" #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" @@ -199,6 +200,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, LateTemplateParser(nullptr), LateTemplateParserCleanup(nullptr), OpaqueParser(nullptr), CurContext(nullptr), ExternalSource(nullptr), CurScope(nullptr), Ident_super(nullptr), + CUDAPtr(std::make_unique(*this)), HLSLPtr(std::make_unique(*this)), OpenACCPtr(std::make_unique(*this)), SYCLPtr(std::make_unique(*this)), @@ -1635,15 +1637,15 @@ bool Sema::hasUncompilableErrorOccurred() const { // Print notes showing how we can reach FD starting from an a priori // known-callable function. static void emitCallStackNotes(Sema &S, const FunctionDecl *FD) { - auto FnIt = S.DeviceKnownEmittedFns.find(FD); - while (FnIt != S.DeviceKnownEmittedFns.end()) { + auto FnIt = S.CUDA().DeviceKnownEmittedFns.find(FD); + while (FnIt != S.CUDA().DeviceKnownEmittedFns.end()) { // Respect error limit. if (S.Diags.hasFatalErrorOccurred()) return; DiagnosticBuilder Builder( S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); Builder << FnIt->second.FD; - FnIt = S.DeviceKnownEmittedFns.find(FnIt->second.FD); + FnIt = S.CUDA().DeviceKnownEmittedFns.find(FnIt->second.FD); } } @@ -1747,7 +1749,7 @@ class DeferredDiagnosticsEmitter (ShouldEmitRootNode || InOMPDeviceContext)) S.finalizeOpenMPDelayedAnalysis(Caller, FD, Loc); if (Caller) - S.DeviceKnownEmittedFns[FD] = {Caller, Loc}; + S.CUDA().DeviceKnownEmittedFns[FD] = {Caller, Loc}; // Always emit deferred diagnostics for the direct users. This does not // lead to explosion of diagnostics since each user is visited at most // twice. @@ -1836,8 +1838,8 @@ void Sema::emitDeferredDiags() { // which other not-known-emitted functions. // // When we see something which is illegal if the current function is emitted -// (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or -// CheckCUDACall), we first check if the current function is known-emitted. If +// (usually by way of DiagIfDeviceCode, DiagIfHostCode, or +// CheckCall), we first check if the current function is known-emitted. If // so, we immediately output the diagnostic. // // Otherwise, we "defer" the diagnostic. It sits in Sema::DeviceDeferredDiags @@ -1900,8 +1902,8 @@ Sema::targetDiag(SourceLocation Loc, unsigned DiagID, const FunctionDecl *FD) { ? diagIfOpenMPDeviceCode(Loc, DiagID, FD) : diagIfOpenMPHostCode(Loc, DiagID, FD); if (getLangOpts().CUDA) - return getLangOpts().CUDAIsDevice ? CUDADiagIfDeviceCode(Loc, DiagID) - : CUDADiagIfHostCode(Loc, DiagID); + return getLangOpts().CUDAIsDevice ? CUDA().DiagIfDeviceCode(Loc, DiagID) + : CUDA().DiagIfHostCode(Loc, DiagID); if (getLangOpts().SYCLIsDevice) return SYCL().DiagIfDeviceCode(Loc, DiagID); diff --git a/clang/lib/Sema/SemaBase.cpp b/clang/lib/Sema/SemaBase.cpp index 95c0cfbe283b0..0442fb2929e3c 100644 --- a/clang/lib/Sema/SemaBase.cpp +++ b/clang/lib/Sema/SemaBase.cpp @@ -1,5 +1,6 @@ #include "clang/Sema/SemaBase.h" #include "clang/Sema/Sema.h" +#include "clang/Sema/SemaCUDA.h" namespace clang { @@ -70,8 +71,8 @@ Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, unsigned DiagID, } SemaDiagnosticBuilder DB = getLangOpts().CUDAIsDevice - ? SemaRef.CUDADiagIfDeviceCode(Loc, DiagID) - : SemaRef.CUDADiagIfHostCode(Loc, DiagID); + ? SemaRef.CUDA().DiagIfDeviceCode(Loc, DiagID) + : SemaRef.CUDA().DiagIfHostCode(Loc, DiagID); SetIsLastErrorImmediate(DB.isImmediate()); return DB; } diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 9d6d709e262ad..80ea43dc5316e 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -10,6 +10,7 @@ /// //===----------------------------------------------------------------------===// +#include "clang/Sema/SemaCUDA.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/ExprCXX.h" @@ -27,6 +28,8 @@ #include using namespace clang; +SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {} + template static bool hasExplicitAttr(const VarDecl *D) { if (!D) return false; @@ -35,37 +38,37 @@ template static bool hasExplicitAttr(const VarDecl *D) { return false; } -void Sema::PushForceCUDAHostDevice() { +void SemaCUDA::PushForceHostDevice() { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - ForceCUDAHostDeviceDepth++; + ForceHostDeviceDepth++; } -bool Sema::PopForceCUDAHostDevice() { +bool SemaCUDA::PopForceHostDevice() { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - if (ForceCUDAHostDeviceDepth == 0) + if (ForceHostDeviceDepth == 0) return false; - ForceCUDAHostDeviceDepth--; + ForceHostDeviceDepth--; return true; } -ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, +ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, MultiExprArg ExecConfig, SourceLocation GGGLoc) { - FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); + FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl(); if (!ConfigDecl) return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) - << getCudaConfigureFuncName()); + << getConfigureFuncName()); QualType ConfigQTy = ConfigDecl->getType(); - DeclRefExpr *ConfigDR = new (Context) - DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); - MarkFunctionReferenced(LLLLoc, ConfigDecl); + DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr( + getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); + SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl); - return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, - /*IsExecConfig=*/true); + return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, + /*IsExecConfig=*/true); } -CUDAFunctionTarget Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { +CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { bool HasHostAttr = false; bool HasDeviceAttr = false; bool HasGlobalAttr = false; @@ -112,12 +115,11 @@ static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { }); } -Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_, - CUDATargetContextKind K, - Decl *D) +SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII( + SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D) : S(S_) { SavedCtx = S.CurCUDATargetCtx; - assert(K == CTCK_InitGlobalVar); + assert(K == SemaCUDA::CTCK_InitGlobalVar); auto *VD = dyn_cast_or_null(D); if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { auto Target = CUDAFunctionTarget::Host; @@ -130,8 +132,8 @@ Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_, } } -/// IdentifyCUDATarget - Determine the CUDA compilation target for this function -CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, +/// IdentifyTarget - Determine the CUDA compilation target for this function +CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D, bool IgnoreImplicitHDAttr) { // Code that lives outside a function gets the target from CurCUDATargetCtx. if (D == nullptr) @@ -160,7 +162,7 @@ CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, } /// IdentifyTarget - Determine the CUDA compilation target for this variable. -Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { +SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) { if (Var->hasAttr()) return CVT_Unified; // Only constexpr and const variabless with implicit constant attribute @@ -180,7 +182,7 @@ Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { // - on both sides in host device functions // - on device side in device or global functions if (auto *FD = dyn_cast(Var->getDeclContext())) { - switch (IdentifyCUDATarget(FD)) { + switch (IdentifyTarget(FD)) { case CUDAFunctionTarget::HostDevice: return CVT_Both; case CUDAFunctionTarget::Device: @@ -221,21 +223,21 @@ Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { // | hd | h | SS | WS | (d) | // | hd | hd | HD | HD | (b) | -Sema::CUDAFunctionPreference -Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, +SemaCUDA::CUDAFunctionPreference +SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, const FunctionDecl *Callee) { assert(Callee && "Callee must be valid."); // Treat ctor/dtor as host device function in device var initializer to allow // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor - // will be diagnosed by checkAllowedCUDAInitializer. + // will be diagnosed by checkAllowedInitializer. if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && CurCUDATargetCtx.Target == CUDAFunctionTarget::Device && (isa(Callee) || isa(Callee))) return CFP_HostDevice; - CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); - CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); + CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller); + CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee); // If one of the targets is invalid, the check always fails, no matter what // the other target is. @@ -309,13 +311,13 @@ template static bool hasImplicitAttr(const FunctionDecl *D) { return D->isImplicit(); } -bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { +bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) { bool IsImplicitDevAttr = hasImplicitAttr(D); bool IsImplicitHostAttr = hasImplicitAttr(D); return IsImplicitDevAttr && IsImplicitHostAttr; } -void Sema::EraseUnwantedCUDAMatches( +void SemaCUDA::EraseUnwantedMatches( const FunctionDecl *Caller, SmallVectorImpl> &Matches) { if (Matches.size() <= 1) @@ -325,7 +327,7 @@ void Sema::EraseUnwantedCUDAMatches( // Gets the CUDA function preference for a call from Caller to Match. auto GetCFP = [&](const Pair &Match) { - return IdentifyCUDAPreference(Caller, Match.second); + return IdentifyPreference(Caller, Match.second); }; // Find the best call preference among the functions in Matches. @@ -367,7 +369,7 @@ resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1, return false; } -bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, +bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, CXXSpecialMemberKind CSM, CXXMethodDecl *MemberDecl, bool ConstRHS, @@ -388,7 +390,7 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, // We're going to invoke special member lookup; mark that these special // members are called from this one, and not from its caller. - ContextRAII MethodContext(*this, MemberDecl); + Sema::ContextRAII MethodContext(SemaRef, MemberDecl); // Look for special members in base classes that should be invoked from here. // Infer the target of this member base on the ones it should call. @@ -412,17 +414,17 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, CXXRecordDecl *BaseClassDecl = cast(BaseType->getDecl()); Sema::SpecialMemberOverloadResult SMOR = - LookupSpecialMember(BaseClassDecl, CSM, - /* ConstArg */ ConstRHS, - /* VolatileArg */ false, - /* RValueThis */ false, - /* ConstThis */ false, - /* VolatileThis */ false); + SemaRef.LookupSpecialMember(BaseClassDecl, CSM, + /* ConstArg */ ConstRHS, + /* VolatileArg */ false, + /* RValueThis */ false, + /* ConstThis */ false, + /* VolatileThis */ false); if (!SMOR.getMethod()) continue; - CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); + CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod()); if (!InferredTarget) { InferredTarget = BaseMethodTarget; } else { @@ -435,7 +437,8 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, << (unsigned)CSM << llvm::to_underlying(*InferredTarget) << llvm::to_underlying(BaseMethodTarget); } - MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); + MemberDecl->addAttr( + CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); return true; } } @@ -448,25 +451,24 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, } const RecordType *FieldType = - Context.getBaseElementType(F->getType())->getAs(); + getASTContext().getBaseElementType(F->getType())->getAs(); if (!FieldType) { continue; } CXXRecordDecl *FieldRecDecl = cast(FieldType->getDecl()); Sema::SpecialMemberOverloadResult SMOR = - LookupSpecialMember(FieldRecDecl, CSM, - /* ConstArg */ ConstRHS && !F->isMutable(), - /* VolatileArg */ false, - /* RValueThis */ false, - /* ConstThis */ false, - /* VolatileThis */ false); + SemaRef.LookupSpecialMember(FieldRecDecl, CSM, + /* ConstArg */ ConstRHS && !F->isMutable(), + /* VolatileArg */ false, + /* RValueThis */ false, + /* ConstThis */ false, + /* VolatileThis */ false); if (!SMOR.getMethod()) continue; - CUDAFunctionTarget FieldMethodTarget = - IdentifyCUDATarget(SMOR.getMethod()); + CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod()); if (!InferredTarget) { InferredTarget = FieldMethodTarget; } else { @@ -479,7 +481,8 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, << (unsigned)CSM << llvm::to_underlying(*InferredTarget) << llvm::to_underlying(FieldMethodTarget); } - MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); + MemberDecl->addAttr( + CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); return true; } } @@ -499,16 +502,16 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, // We either setting attributes first time, or the inferred ones must match // previously set ones. if (NeedsD && !HasD) - MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); if (NeedsH && !HasH) - MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); + MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); return false; } -bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { +bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { if (!CD->isDefined() && CD->isTemplateInstantiation()) - InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); + SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered // empty at a point in the translation unit, if it is either a @@ -536,7 +539,7 @@ bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { if (const CXXConstructExpr *CE = dyn_cast(CI->getInit())) - return isEmptyCudaConstructor(Loc, CE->getConstructor()); + return isEmptyConstructor(Loc, CE->getConstructor()); return false; })) return false; @@ -544,13 +547,13 @@ bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { return true; } -bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { +bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { // No destructor -> no problem. if (!DD) return true; if (!DD->isDefined() && DD->isTemplateInstantiation()) - InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); + SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered // empty at a point in the translation unit, if it is either a @@ -579,7 +582,7 @@ bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { // destructors for all base classes... if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) - return isEmptyCudaDestructor(Loc, RD->getDestructor()); + return isEmptyDestructor(Loc, RD->getDestructor()); return true; })) return false; @@ -589,7 +592,7 @@ bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { if (CXXRecordDecl *RD = Field->getType() ->getBaseElementTypeUnsafe() ->getAsCXXRecordDecl()) - return isEmptyCudaDestructor(Loc, RD->getDestructor()); + return isEmptyDestructor(Loc, RD->getDestructor()); return true; })) return false; @@ -620,7 +623,7 @@ bool IsDependentVar(VarDecl *VD) { // __shared__ variables whether they are local or not (they all are implicitly // static in CUDA). One exception is that CUDA allows constant initializers // for __constant__ and __device__ variables. -bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, +bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD, CUDAInitializerCheckKind CheckKind) { assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); assert(!IsDependentVar(VD) && "do not check dependent var"); @@ -629,30 +632,30 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, if (!Init) return true; if (const auto *CE = dyn_cast(Init)) { - return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); + return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor()); } return false; }; auto IsConstantInit = [&](const Expr *Init) { assert(Init); - ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context, + ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(), /*NoWronSidedVars=*/true); - return Init->isConstantInitializer(S.Context, + return Init->isConstantInitializer(S.getASTContext(), VD->getType()->isReferenceType()); }; auto HasEmptyDtor = [&](VarDecl *VD) { if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) - return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); + return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor()); return true; }; if (CheckKind == CICK_Shared) return IsEmptyInit(Init) && HasEmptyDtor(VD); - return S.LangOpts.GPUAllowDeviceInit || + return S.getLangOpts().GPUAllowDeviceInit || ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); } } // namespace -void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { +void SemaCUDA::checkAllowedInitializer(VarDecl *VD) { // Return early if VD is inside a non-instantiated template function since // the implicit constructor is not defined yet. if (const FunctionDecl *FD = @@ -688,7 +691,7 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { InitFn = CE->getDirectCallee(); } if (InitFn) { - CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); + CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn); if (InitFnTarget != CUDAFunctionTarget::Host && InitFnTarget != CUDAFunctionTarget::HostDevice) { Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) @@ -700,22 +703,22 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { } } -void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice( +void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice( const FunctionDecl *Callee) { - FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); if (!Caller) return; - if (!isCUDAImplicitHostDeviceFunction(Callee)) + if (!isImplicitHostDeviceFunction(Callee)) return; - CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); + CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller); // Record whether an implicit host device function is used on device side. if (CallerTarget != CUDAFunctionTarget::Device && CallerTarget != CUDAFunctionTarget::Global && (CallerTarget != CUDAFunctionTarget::HostDevice || - (isCUDAImplicitHostDeviceFunction(Caller) && + (isImplicitHostDeviceFunction(Caller) && !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller)))) return; @@ -731,18 +734,18 @@ void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice( // system header, in which case we leave the constexpr function unattributed. // // In addition, all function decls are treated as __host__ __device__ when -// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a +// ForceHostDeviceDepth > 0 (corresponding to code within a // #pragma clang force_cuda_host_device_begin/end // pair). -void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, +void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD, const LookupResult &Previous) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - if (ForceCUDAHostDeviceDepth > 0) { + if (ForceHostDeviceDepth > 0) { if (!NewD->hasAttr()) - NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); if (!NewD->hasAttr()) - NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); return; } @@ -753,8 +756,8 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, !NewD->hasAttr() && (NewD->getDescribedFunctionTemplate() || NewD->isFunctionTemplateSpecialization())) { - NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); - NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); + NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); return; } @@ -771,8 +774,9 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, FunctionDecl *OldD = D->getAsFunction(); return OldD && OldD->hasAttr() && !OldD->hasAttr() && - !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, - /* ConsiderCudaAttrs = */ false); + !SemaRef.IsOverload(NewD, OldD, + /* UseMemberUsingDeclRules = */ false, + /* ConsiderCudaAttrs = */ false); }; auto It = llvm::find_if(Previous, IsMatchingDeviceFn); if (It != Previous.end()) { @@ -781,7 +785,7 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, // in a system header, in which case we simply return without making NewD // host+device. NamedDecl *Match = *It; - if (!getSourceManager().isInSystemHeader(Match->getLocation())) { + if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) { Diag(NewD->getLocation(), diag::err_cuda_unattributed_constexpr_cannot_overload_device) << NewD; @@ -791,14 +795,14 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, return; } - NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); - NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); + NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); } // TODO: `__constant__` memory may be a limited resource for certain targets. // A safeguard may be needed at the end of compilation pipeline if // `__constant__` memory usage goes beyond limit. -void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { +void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) { // Do not promote dependent variables since the cotr/dtor/initializer are // not determined. Do it after instantiation. if (getLangOpts().CUDAIsDevice && !VD->hasAttr() && @@ -812,14 +816,15 @@ void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { } } -Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, - unsigned DiagID) { +SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, + unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); + FunctionDecl *CurFunContext = + SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); SemaDiagnosticBuilder::Kind DiagKind = [&] { if (!CurFunContext) return SemaDiagnosticBuilder::K_Nop; - switch (CurrentCUDATarget()) { + switch (CurrentTarget()) { case CUDAFunctionTarget::Global: case CUDAFunctionTarget::Device: return SemaDiagnosticBuilder::K_Immediate; @@ -829,27 +834,29 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, // mode until the function is known-emitted. if (!getLangOpts().CUDAIsDevice) return SemaDiagnosticBuilder::K_Nop; - if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) + if (SemaRef.IsLastErrorImmediate && + getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID)) return SemaDiagnosticBuilder::K_Immediate; - return (getEmissionStatus(CurFunContext) == - FunctionEmissionStatus::Emitted) + return (SemaRef.getEmissionStatus(CurFunContext) == + Sema::FunctionEmissionStatus::Emitted) ? SemaDiagnosticBuilder::K_ImmediateWithCallStack : SemaDiagnosticBuilder::K_Deferred; default: return SemaDiagnosticBuilder::K_Nop; } }(); - return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); + return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); } -Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, +Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); + FunctionDecl *CurFunContext = + SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); SemaDiagnosticBuilder::Kind DiagKind = [&] { if (!CurFunContext) return SemaDiagnosticBuilder::K_Nop; - switch (CurrentCUDATarget()) { + switch (CurrentTarget()) { case CUDAFunctionTarget::Host: return SemaDiagnosticBuilder::K_Immediate; case CUDAFunctionTarget::HostDevice: @@ -858,40 +865,41 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, // mode until the function is known-emitted. if (getLangOpts().CUDAIsDevice) return SemaDiagnosticBuilder::K_Nop; - if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) + if (SemaRef.IsLastErrorImmediate && + getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID)) return SemaDiagnosticBuilder::K_Immediate; - return (getEmissionStatus(CurFunContext) == - FunctionEmissionStatus::Emitted) + return (SemaRef.getEmissionStatus(CurFunContext) == + Sema::FunctionEmissionStatus::Emitted) ? SemaDiagnosticBuilder::K_ImmediateWithCallStack : SemaDiagnosticBuilder::K_Deferred; default: return SemaDiagnosticBuilder::K_Nop; } }(); - return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); + return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); } -bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { +bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); assert(Callee && "Callee may not be null."); - const auto &ExprEvalCtx = currentEvaluationContext(); + const auto &ExprEvalCtx = SemaRef.currentEvaluationContext(); if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) return true; // FIXME: Is bailing out early correct here? Should we instead assume that // the caller is a global initializer? - FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); if (!Caller) return true; // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - bool CallerKnownEmitted = - getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; + bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) == + Sema::FunctionEmissionStatus::Emitted; SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, CallerKnownEmitted] { - switch (IdentifyCUDAPreference(Caller, Callee)) { + switch (IdentifyPreference(Caller, Callee)) { case CFP_Never: case CFP_WrongSide: assert(Caller && "Never/wrongSide calls require a non-null caller"); @@ -908,7 +916,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { if (DiagKind == SemaDiagnosticBuilder::K_Nop) { // For -fgpu-rdc, keep track of external kernels used by host functions. - if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode && + if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode && Callee->hasAttr() && !Callee->isDefined() && (!Caller || (!Caller->getDescribedFunctionTemplate() && getASTContext().GetGVALinkageForFunction(Caller) == @@ -924,12 +932,13 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) return true; - SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) - << llvm::to_underlying(IdentifyCUDATarget(Callee)) << /*function*/ 0 - << Callee << llvm::to_underlying(IdentifyCUDATarget(Caller)); + SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, + SemaRef) + << llvm::to_underlying(IdentifyTarget(Callee)) << /*function*/ 0 << Callee + << llvm::to_underlying(IdentifyTarget(Caller)); if (!Callee->getBuiltinID()) SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), - diag::note_previous_decl, Caller, *this) + diag::note_previous_decl, Caller, SemaRef) << Callee; return DiagKind != SemaDiagnosticBuilder::K_Immediate && DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; @@ -940,7 +949,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { // defined and uses the capture by reference when the lambda is called. When // the capture and use happen on different sides, the capture is invalid and // should be diagnosed. -void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, +void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee, const sema::Capture &Capture) { // In host compilation we only need to check lambda functions emitted on host // side. In such lambda functions, a reference capture is invalid only @@ -950,12 +959,12 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, // kernel cannot pass a lambda back to a host function since we cannot // define a kernel argument type which can hold the lambda before the lambda // itself is defined. - if (!LangOpts.CUDAIsDevice) + if (!getLangOpts().CUDAIsDevice) return; // File-scope lambda can only do init captures for global variables, which // results in passing by value for these global variables. - FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); if (!Caller) return; @@ -972,7 +981,7 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, auto DiagKind = SemaDiagnosticBuilder::K_Deferred; if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), - diag::err_capture_bad_target, Callee, *this) + diag::err_capture_bad_target, Callee, SemaRef) << Capture.getVariable(); } else if (Capture.isThisCapture()) { // Capture of this pointer is allowed since this pointer may be pointing to @@ -981,28 +990,28 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, // accessible on device side. SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), diag::warn_maybe_capture_bad_target_this_ptr, Callee, - *this); + SemaRef); } } -void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { +void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); if (Method->hasAttr() || Method->hasAttr()) return; - Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); - Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); + Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); + Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); } -void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, +void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); + CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD); for (NamedDecl *OldND : Previous) { FunctionDecl *OldFD = OldND->getAsFunction(); if (!OldFD) continue; - CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); + CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD); // Don't allow HD and global functions to overload other functions with the // same signature. We allow overloading based on CUDA attributes so that // functions can have different implementations on the host and device, but @@ -1010,17 +1019,17 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, // should have the same implementation on both sides. if (NewTarget != OldTarget && ((NewTarget == CUDAFunctionTarget::HostDevice && - !(LangOpts.OffloadImplicitHostDeviceTemplates && - isCUDAImplicitHostDeviceFunction(NewFD) && + !(getLangOpts().OffloadImplicitHostDeviceTemplates && + isImplicitHostDeviceFunction(NewFD) && OldTarget == CUDAFunctionTarget::Device)) || (OldTarget == CUDAFunctionTarget::HostDevice && - !(LangOpts.OffloadImplicitHostDeviceTemplates && - isCUDAImplicitHostDeviceFunction(OldFD) && + !(getLangOpts().OffloadImplicitHostDeviceTemplates && + isImplicitHostDeviceFunction(OldFD) && NewTarget == CUDAFunctionTarget::Device)) || (NewTarget == CUDAFunctionTarget::Global) || (OldTarget == CUDAFunctionTarget::Global)) && - !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, - /* ConsiderCudaAttrs = */ false)) { + !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, + /* ConsiderCudaAttrs = */ false)) { Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) << llvm::to_underlying(NewTarget) << NewFD->getDeclName() << llvm::to_underlying(OldTarget) << OldFD; @@ -1041,21 +1050,21 @@ static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, } } -void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, +void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD) { const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); - copyAttrIfPresent(*this, FD, TemplateFD); - copyAttrIfPresent(*this, FD, TemplateFD); - copyAttrIfPresent(*this, FD, TemplateFD); + copyAttrIfPresent(SemaRef, FD, TemplateFD); + copyAttrIfPresent(SemaRef, FD, TemplateFD); + copyAttrIfPresent(SemaRef, FD, TemplateFD); } -std::string Sema::getCudaConfigureFuncName() const { +std::string SemaCUDA::getConfigureFuncName() const { if (getLangOpts().HIP) return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" : "hipConfigureCall"; // New CUDA kernel launch sequence. - if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), + if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH)) return "__cudaPushCallConfiguration"; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 720e56692359b..17032d1370521 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -45,6 +45,7 @@ #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" @@ -10595,12 +10596,12 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // We do not add HD attributes to specializations here because // they may have different constexpr-ness compared to their - // templates and, after maybeAddCUDAHostDeviceAttrs() is applied, + // templates and, after maybeAddHostDeviceAttrs() is applied, // may end up with different effective targets. Instead, a // specialization inherits its target attributes from its template // in the CheckFunctionTemplateSpecialization() call below. if (getLangOpts().CUDA && !isFunctionTemplateSpecialization) - maybeAddCUDAHostDeviceAttrs(NewFD, Previous); + CUDA().maybeAddHostDeviceAttrs(NewFD, Previous); // Handle explict specializations of function templates // and friend function declarations with an explicit @@ -10898,12 +10899,12 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (getLangOpts().CUDA) { IdentifierInfo *II = NewFD->getIdentifier(); - if (II && II->isStr(getCudaConfigureFuncName()) && + if (II && II->isStr(CUDA().getConfigureFuncName()) && !NewFD->isInvalidDecl() && NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) { if (!R->castAs()->getReturnType()->isScalarType()) Diag(NewFD->getLocation(), diag::err_config_scalar_return) - << getCudaConfigureFuncName(); + << CUDA().getConfigureFuncName(); Context.setcudaConfigureCallDecl(NewFD); } @@ -12398,7 +12399,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, } if (!Redeclaration && LangOpts.CUDA) - checkCUDATargetOverload(NewFD, Previous); + CUDA().checkTargetOverload(NewFD, Previous); } // Check if the function definition uses any AArch64 SME features without @@ -14415,7 +14416,7 @@ StmtResult Sema::ActOnCXXForRangeIdentifier(Scope *S, SourceLocation IdentLoc, void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { if (var->isInvalidDecl()) return; - MaybeAddCUDAConstantAttr(var); + CUDA().MaybeAddConstantAttr(var); if (getLangOpts().OpenCL) { // OpenCL v2.0 s6.12.5 - Every block variable declaration must have an @@ -14829,7 +14830,7 @@ void Sema::FinalizeDeclaration(Decl *ThisDecl) { // variables whether they are local or not. CUDA also allows // constant initializers for __constant__ and __device__ variables. if (getLangOpts().CUDA) - checkAllowedCUDAInitializer(VD); + CUDA().checkAllowedInitializer(VD); // Grab the dllimport or dllexport attribute off of the VarDecl. const InheritableAttr *DLLAttr = getDLLAttr(VD); @@ -20666,7 +20667,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, // when compiling for host, device and global functions are never emitted. // (Technically, we do emit a host-side stub for global functions, but this // doesn't count for our purposes here.) - CUDAFunctionTarget T = IdentifyCUDATarget(FD); + CUDAFunctionTarget T = CUDA().IdentifyTarget(FD); if (LangOpts.CUDAIsDevice && T == CUDAFunctionTarget::Host) return FunctionEmissionStatus::CUDADiscarded; if (!LangOpts.CUDAIsDevice && @@ -20691,5 +20692,5 @@ bool Sema::shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee) { // for host, only HD functions actually called from the host get marked as // known-emitted. return LangOpts.CUDA && !LangOpts.CUDAIsDevice && - IdentifyCUDATarget(Callee) == CUDAFunctionTarget::Global; + CUDA().IdentifyTarget(Callee) == CUDAFunctionTarget::Global; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 56c9d90c9b52b..b7b1fbc625a15 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -39,6 +39,7 @@ #include "clang/Sema/ParsedAttr.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" #include "llvm/ADT/STLExtras.h" @@ -5099,8 +5100,8 @@ static void handleSharedAttr(Sema &S, Decl *D, const ParsedAttr &AL) { return; } if (S.getLangOpts().CUDA && VD->hasLocalStorage() && - S.CUDADiagIfHostCode(AL.getLoc(), diag::err_cuda_host_shared) - << llvm::to_underlying(S.CurrentCUDATarget())) + S.CUDA().DiagIfHostCode(AL.getLoc(), diag::err_cuda_host_shared) + << llvm::to_underlying(S.CUDA().CurrentTarget())) return; D->addAttr(::new (S.Context) CUDASharedAttr(S.Context, AL)); } @@ -5189,8 +5190,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // Diagnostic is emitted elsewhere: here we store the (valid) AL // in the Decl node for syntactic reasoning, e.g., pretty-printing. CallingConv CC; - if (S.CheckCallingConvAttr(AL, CC, /*FD*/ nullptr, - S.IdentifyCUDATarget(dyn_cast(D)))) + if (S.CheckCallingConvAttr( + AL, CC, /*FD*/ nullptr, + S.CUDA().IdentifyTarget(dyn_cast(D)))) return; if (!isa(D)) { @@ -5495,7 +5497,7 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, if (LangOpts.CUDA) { auto *Aux = Context.getAuxTargetInfo(); assert(FD || CFT != CUDAFunctionTarget::InvalidTarget); - auto CudaTarget = FD ? IdentifyCUDATarget(FD) : CFT; + auto CudaTarget = FD ? CUDA().IdentifyTarget(FD) : CFT; bool CheckHost = false, CheckDevice = false; switch (CudaTarget) { case CUDAFunctionTarget::HostDevice: diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 51c14443d2d8f..1fe10375222c5 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -42,6 +42,7 @@ #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "llvm/ADT/ArrayRef.h" @@ -9876,15 +9877,15 @@ bool Sema::ShouldDeleteSpecialMember(CXXMethodDecl *MD, // failed. // For inherited constructors (non-null ICI), CSM may be passed so that MD // is treated as certain special member, which may not reflect what special - // member MD really is. However inferCUDATargetForImplicitSpecialMember + // member MD really is. However inferTargetForImplicitSpecialMember // expects CSM to match MD, therefore recalculate CSM. assert(ICI || CSM == getSpecialMember(MD)); auto RealCSM = CSM; if (ICI) RealCSM = getSpecialMember(MD); - return inferCUDATargetForImplicitSpecialMember(RD, RealCSM, MD, - SMI.ConstArg, Diagnose); + return CUDA().inferTargetForImplicitSpecialMember(RD, RealCSM, MD, + SMI.ConstArg, Diagnose); } return false; @@ -14055,7 +14056,7 @@ CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor( setupImplicitSpecialMemberType(DefaultCon, Context.VoidTy, std::nullopt); if (getLangOpts().CUDA) - inferCUDATargetForImplicitSpecialMember( + CUDA().inferTargetForImplicitSpecialMember( ClassDecl, CXXSpecialMemberKind::DefaultConstructor, DefaultCon, /* ConstRHS */ false, /* Diagnose */ false); @@ -14341,7 +14342,7 @@ CXXDestructorDecl *Sema::DeclareImplicitDestructor(CXXRecordDecl *ClassDecl) { setupImplicitSpecialMemberType(Destructor, Context.VoidTy, std::nullopt); if (getLangOpts().CUDA) - inferCUDATargetForImplicitSpecialMember( + CUDA().inferTargetForImplicitSpecialMember( ClassDecl, CXXSpecialMemberKind::Destructor, Destructor, /* ConstRHS */ false, /* Diagnose */ false); @@ -14983,7 +14984,7 @@ CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) { setupImplicitSpecialMemberType(CopyAssignment, RetType, ArgType); if (getLangOpts().CUDA) - inferCUDATargetForImplicitSpecialMember( + CUDA().inferTargetForImplicitSpecialMember( ClassDecl, CXXSpecialMemberKind::CopyAssignment, CopyAssignment, /* ConstRHS */ Const, /* Diagnose */ false); @@ -15335,7 +15336,7 @@ CXXMethodDecl *Sema::DeclareImplicitMoveAssignment(CXXRecordDecl *ClassDecl) { setupImplicitSpecialMemberType(MoveAssignment, RetType, ArgType); if (getLangOpts().CUDA) - inferCUDATargetForImplicitSpecialMember( + CUDA().inferTargetForImplicitSpecialMember( ClassDecl, CXXSpecialMemberKind::MoveAssignment, MoveAssignment, /* ConstRHS */ false, /* Diagnose */ false); @@ -15733,7 +15734,7 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor( setupImplicitSpecialMemberType(CopyConstructor, Context.VoidTy, ArgType); if (getLangOpts().CUDA) - inferCUDATargetForImplicitSpecialMember( + CUDA().inferTargetForImplicitSpecialMember( ClassDecl, CXXSpecialMemberKind::CopyConstructor, CopyConstructor, /* ConstRHS */ Const, /* Diagnose */ false); @@ -15878,7 +15879,7 @@ CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor( setupImplicitSpecialMemberType(MoveConstructor, Context.VoidTy, ArgType); if (getLangOpts().CUDA) - inferCUDATargetForImplicitSpecialMember( + CUDA().inferTargetForImplicitSpecialMember( ClassDecl, CXXSpecialMemberKind::MoveConstructor, MoveConstructor, /* ConstRHS */ false, /* Diagnose */ false); @@ -16184,7 +16185,7 @@ ExprResult Sema::BuildCXXConstructExpr( DeclInitType->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) && "given constructor for wrong type"); MarkFunctionReferenced(ConstructLoc, Constructor); - if (getLangOpts().CUDA && !CheckCUDACall(ConstructLoc, Constructor)) + if (getLangOpts().CUDA && !CUDA().CheckCall(ConstructLoc, Constructor)) return ExprError(); return CheckForImmediateInvocation( diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index b294d2bd9f53f..823bf36d88bc9 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -49,6 +49,7 @@ #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaFixItUtils.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" @@ -308,7 +309,7 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, DeduceReturnType(FD, Loc)) return true; - if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD)) + if (getLangOpts().CUDA && !CUDA().CheckCall(Loc, FD)) return true; } @@ -17307,7 +17308,7 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, // CUDA device code does not support varargs. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { if (const FunctionDecl *F = dyn_cast(CurContext)) { - CUDAFunctionTarget T = IdentifyCUDATarget(F); + CUDAFunctionTarget T = CUDA().IdentifyTarget(F); if (T == CUDAFunctionTarget::Global || T == CUDAFunctionTarget::Device || T == CUDAFunctionTarget::HostDevice) return ExprError(Diag(E->getBeginLoc(), diag::err_va_arg_in_device)); @@ -18961,7 +18962,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, checkSpecializationReachability(Loc, Func); if (getLangOpts().CUDA) - CheckCUDACall(Loc, Func); + CUDA().CheckCall(Loc, Func); // If we need a definition, try to create one. if (NeedDefinition && !Func->getBody()) { @@ -19108,7 +19109,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, // side. Therefore keep trying until it is recorded. if (LangOpts.OffloadImplicitHostDeviceTemplates && LangOpts.CUDAIsDevice && !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Func)) - CUDARecordImplicitHostDeviceFuncUsedByDevice(Func); + CUDA().RecordImplicitHostDeviceFuncUsedByDevice(Func); // If this is the first "real" use, act on that. if (OdrUse == OdrUseContext::Used && !Func->isUsed(/*CheckUsedAttr=*/false)) { @@ -19181,9 +19182,9 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef, if (SemaRef.LangOpts.CUDA && Var->hasGlobalStorage()) { auto *FD = dyn_cast_or_null(SemaRef.CurContext); - auto VarTarget = SemaRef.IdentifyCUDATarget(Var); - auto UserTarget = SemaRef.IdentifyCUDATarget(FD); - if (VarTarget == Sema::CVT_Host && + auto VarTarget = SemaRef.CUDA().IdentifyTarget(Var); + auto UserTarget = SemaRef.CUDA().IdentifyTarget(FD); + if (VarTarget == SemaCUDA::CVT_Host && (UserTarget == CUDAFunctionTarget::Device || UserTarget == CUDAFunctionTarget::HostDevice || UserTarget == CUDAFunctionTarget::Global)) { @@ -19199,7 +19200,7 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef, ? diag::note_cuda_const_var_unpromoted : diag::note_cuda_host_var); } - } else if (VarTarget == Sema::CVT_Device && + } else if (VarTarget == SemaCUDA::CVT_Device && !Var->hasAttr() && (UserTarget == CUDAFunctionTarget::Host || UserTarget == CUDAFunctionTarget::HostDevice)) { diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index ce9d5c26e2185..8911257a6f614 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -38,6 +38,7 @@ #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaLambda.h" #include "clang/Sema/Template.h" @@ -884,8 +885,8 @@ ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex, // Exceptions aren't allowed in CUDA device code. if (getLangOpts().CUDA) - CUDADiagIfDeviceCode(OpLoc, diag::err_cuda_device_exceptions) - << "throw" << llvm::to_underlying(CurrentCUDATarget()); + CUDA().DiagIfDeviceCode(OpLoc, diag::err_cuda_device_exceptions) + << "throw" << llvm::to_underlying(CUDA().CurrentTarget()); if (getCurScope() && getCurScope()->isOpenMPSimdDirectiveScope()) Diag(OpLoc, diag::err_omp_simd_region_cannot_use_stmt) << "throw"; @@ -1708,17 +1709,17 @@ bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) { // [CUDA] Ignore this function, if we can't call it. const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); if (getLangOpts().CUDA) { - auto CallPreference = IdentifyCUDAPreference(Caller, Method); + auto CallPreference = CUDA().IdentifyPreference(Caller, Method); // If it's not callable at all, it's not the right function. - if (CallPreference < CFP_WrongSide) + if (CallPreference < SemaCUDA::CFP_WrongSide) return false; - if (CallPreference == CFP_WrongSide) { + if (CallPreference == SemaCUDA::CFP_WrongSide) { // Maybe. We have to check if there are better alternatives. DeclContext::lookup_result R = Method->getDeclContext()->lookup(Method->getDeclName()); for (const auto *D : R) { if (const auto *FD = dyn_cast(D)) { - if (IdentifyCUDAPreference(Caller, FD) > CFP_WrongSide) + if (CUDA().IdentifyPreference(Caller, FD) > SemaCUDA::CFP_WrongSide) return false; } } @@ -1737,7 +1738,7 @@ bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) { return llvm::none_of(PreventedBy, [&](const FunctionDecl *FD) { assert(FD->getNumParams() == 1 && "Only single-operand functions should be in PreventedBy"); - return IdentifyCUDAPreference(Caller, FD) >= CFP_HostDevice; + return CUDA().IdentifyPreference(Caller, FD) >= SemaCUDA::CFP_HostDevice; }); } @@ -1774,7 +1775,7 @@ namespace { UsualDeallocFnInfo(Sema &S, DeclAccessPair Found) : Found(Found), FD(dyn_cast(Found->getUnderlyingDecl())), Destroying(false), HasSizeT(false), HasAlignValT(false), - CUDAPref(Sema::CFP_Native) { + CUDAPref(SemaCUDA::CFP_Native) { // A function template declaration is never a usual deallocation function. if (!FD) return; @@ -1800,7 +1801,7 @@ namespace { // In CUDA, determine how much we'd like / dislike to call this. if (S.getLangOpts().CUDA) - CUDAPref = S.IdentifyCUDAPreference( + CUDAPref = S.CUDA().IdentifyPreference( S.getCurFunctionDecl(/*AllowLambda=*/true), FD); } @@ -1831,7 +1832,7 @@ namespace { DeclAccessPair Found; FunctionDecl *FD; bool Destroying, HasSizeT, HasAlignValT; - Sema::CUDAFunctionPreference CUDAPref; + SemaCUDA::CUDAFunctionPreference CUDAPref; }; } @@ -1855,7 +1856,7 @@ static UsualDeallocFnInfo resolveDeallocationOverload( for (auto I = R.begin(), E = R.end(); I != E; ++I) { UsualDeallocFnInfo Info(S, I.getPair()); if (!Info || !isNonPlacementDeallocationFunction(S, Info.FD) || - Info.CUDAPref == Sema::CFP_Never) + Info.CUDAPref == SemaCUDA::CFP_Never) continue; if (!Best) { @@ -2956,8 +2957,8 @@ bool Sema::FindAllocationFunctions(SourceLocation StartLoc, SourceRange Range, } if (getLangOpts().CUDA) - EraseUnwantedCUDAMatches(getCurFunctionDecl(/*AllowLambda=*/true), - Matches); + CUDA().EraseUnwantedMatches(getCurFunctionDecl(/*AllowLambda=*/true), + Matches); } else { // C++1y [expr.new]p22: // For a non-placement allocation function, the normal deallocation diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index 5b95bae567b72..35a51c6c2328d 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -9,17 +9,18 @@ // This file implements semantic analysis for C++ lambda expressions. // //===----------------------------------------------------------------------===// -#include "clang/Sema/DeclSpec.h" +#include "clang/Sema/SemaLambda.h" #include "TypeLocBuilder.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/ExprCXX.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Sema/DeclSpec.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" -#include "clang/Sema/SemaLambda.h" #include "clang/Sema/Template.h" #include "llvm/ADT/STLExtras.h" #include @@ -1393,7 +1394,7 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, // CUDA lambdas get implicit host and device attributes. if (getLangOpts().CUDA) - CUDASetLambdaAttrs(Method); + CUDA().SetLambdaAttrs(Method); // OpenMP lambdas might get assumumption attributes. if (LangOpts.OpenMP) @@ -2136,7 +2137,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc, CaptureInits.push_back(Init.get()); if (LangOpts.CUDA) - CUDACheckLambdaCapture(CallOperator, From); + CUDA().CheckLambdaCapture(CallOperator, From); } Class->setCaptures(Context, Captures); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index e1155dc2d5d28..397e7681828f3 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -31,6 +31,7 @@ #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Overload.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateDeduction.h" @@ -1549,8 +1550,8 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, // Don't allow overloading of destructors. (In theory we could, but it // would be a giant change to clang.) if (!isa(New)) { - CUDAFunctionTarget NewTarget = SemaRef.IdentifyCUDATarget(New), - OldTarget = SemaRef.IdentifyCUDATarget(Old); + CUDAFunctionTarget NewTarget = SemaRef.CUDA().IdentifyTarget(New), + OldTarget = SemaRef.CUDA().IdentifyTarget(Old); if (NewTarget != CUDAFunctionTarget::InvalidTarget) { assert((OldTarget != CUDAFunctionTarget::InvalidTarget) && "Unexpected invalid target."); @@ -7100,7 +7101,7 @@ void Sema::AddOverloadCandidate( // inferred for the member automatically, based on the bases and fields of // the class. if (!(Caller && Caller->isImplicit()) && - !IsAllowedCUDACall(Caller, Function)) { + !CUDA().IsAllowedCall(Caller, Function)) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_bad_target; return; @@ -7618,7 +7619,8 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, // (CUDA B.1): Check for invalid calls between targets. if (getLangOpts().CUDA) - if (!IsAllowedCUDACall(getCurFunctionDecl(/*AllowLambda=*/true), Method)) { + if (!CUDA().IsAllowedCall(getCurFunctionDecl(/*AllowLambda=*/true), + Method)) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_bad_target; return; @@ -10440,7 +10442,7 @@ bool clang::isBetterOverloadCandidate( // If other rules cannot determine which is better, CUDA preference will be // used again to determine which is better. // - // TODO: Currently IdentifyCUDAPreference does not return correct values + // TODO: Currently IdentifyPreference does not return correct values // for functions called in global variable initializers due to missing // correct context about device/host. Therefore we can only enforce this // rule when there is a caller. We should enforce this rule for functions @@ -10452,14 +10454,14 @@ bool clang::isBetterOverloadCandidate( if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function && S.getLangOpts().GPUExcludeWrongSideOverloads) { if (FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true)) { - bool IsCallerImplicitHD = Sema::isCUDAImplicitHostDeviceFunction(Caller); + bool IsCallerImplicitHD = SemaCUDA::isImplicitHostDeviceFunction(Caller); bool IsCand1ImplicitHD = - Sema::isCUDAImplicitHostDeviceFunction(Cand1.Function); + SemaCUDA::isImplicitHostDeviceFunction(Cand1.Function); bool IsCand2ImplicitHD = - Sema::isCUDAImplicitHostDeviceFunction(Cand2.Function); - auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function); - auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function); - assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never); + SemaCUDA::isImplicitHostDeviceFunction(Cand2.Function); + auto P1 = S.CUDA().IdentifyPreference(Caller, Cand1.Function); + auto P2 = S.CUDA().IdentifyPreference(Caller, Cand2.Function); + assert(P1 != SemaCUDA::CFP_Never && P2 != SemaCUDA::CFP_Never); // The implicit HD function may be a function in a system header which // is forced by pragma. In device compilation, if we prefer HD candidates // over wrong-sided candidates, overloading resolution may change, which @@ -10473,8 +10475,8 @@ bool clang::isBetterOverloadCandidate( auto EmitThreshold = (S.getLangOpts().CUDAIsDevice && IsCallerImplicitHD && (IsCand1ImplicitHD || IsCand2ImplicitHD)) - ? Sema::CFP_Never - : Sema::CFP_WrongSide; + ? SemaCUDA::CFP_Never + : SemaCUDA::CFP_WrongSide; auto Cand1Emittable = P1 > EmitThreshold; auto Cand2Emittable = P2 > EmitThreshold; if (Cand1Emittable && !Cand2Emittable) @@ -10758,8 +10760,8 @@ bool clang::isBetterOverloadCandidate( // to determine which is better. if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true); - return S.IdentifyCUDAPreference(Caller, Cand1.Function) > - S.IdentifyCUDAPreference(Caller, Cand2.Function); + return S.CUDA().IdentifyPreference(Caller, Cand1.Function) > + S.CUDA().IdentifyPreference(Caller, Cand2.Function); } // General member function overloading is handled above, so this only handles @@ -10891,15 +10893,15 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { // Check viable function only. return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == - Sema::CFP_SameSide; + S.CUDA().IdentifyPreference(Caller, Cand->Function) == + SemaCUDA::CFP_SameSide; }); if (ContainsSameSideCandidate) { auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) { // Check viable function only to avoid unnecessary data copying/moving. return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == - Sema::CFP_WrongSide; + S.CUDA().IdentifyPreference(Caller, Cand->Function) == + SemaCUDA::CFP_WrongSide; }; llvm::erase_if(Candidates, IsWrongSideCandidate); } @@ -11938,8 +11940,8 @@ static void DiagnoseBadTarget(Sema &S, OverloadCandidate *Cand) { FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true); FunctionDecl *Callee = Cand->Function; - CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(Caller), - CalleeTarget = S.IdentifyCUDATarget(Callee); + CUDAFunctionTarget CallerTarget = S.CUDA().IdentifyTarget(Caller), + CalleeTarget = S.CUDA().IdentifyTarget(Callee); std::string FnDesc; std::pair FnKindPair = @@ -11986,9 +11988,9 @@ static void DiagnoseBadTarget(Sema &S, OverloadCandidate *Cand) { } } - S.inferCUDATargetForImplicitSpecialMember(ParentClass, CSM, Meth, - /* ConstRHS */ ConstRHS, - /* Diagnose */ true); + S.CUDA().inferTargetForImplicitSpecialMember(ParentClass, CSM, Meth, + /* ConstRHS */ ConstRHS, + /* Diagnose */ true); } } @@ -13060,7 +13062,7 @@ class AddressOfFunctionResolver { if (S.getLangOpts().CUDA) { FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true); if (!(Caller && Caller->isImplicit()) && - !S.IsAllowedCUDACall(Caller, FunDecl)) + !S.CUDA().IsAllowedCall(Caller, FunDecl)) return false; } if (FunDecl->isMultiVersion()) { @@ -13180,8 +13182,8 @@ class AddressOfFunctionResolver { } void EliminateSuboptimalCudaMatches() { - S.EraseUnwantedCUDAMatches(S.getCurFunctionDecl(/*AllowLambda=*/true), - Matches); + S.CUDA().EraseUnwantedMatches(S.getCurFunctionDecl(/*AllowLambda=*/true), + Matches); } public: @@ -13335,8 +13337,8 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) { // Return positive for better, negative for worse, 0 for equal preference. auto CheckCUDAPreference = [&](FunctionDecl *FD1, FunctionDecl *FD2) { FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); - return static_cast(IdentifyCUDAPreference(Caller, FD1)) - - static_cast(IdentifyCUDAPreference(Caller, FD2)); + return static_cast(CUDA().IdentifyPreference(Caller, FD1)) - + static_cast(CUDA().IdentifyPreference(Caller, FD2)); }; auto CheckMoreConstrained = [&](FunctionDecl *FD1, diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 1c2f6120f6218..d28c24cfdfd33 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -33,6 +33,7 @@ #include "clang/Sema/Ownership.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -4574,8 +4575,8 @@ StmtResult Sema::ActOnCXXTryBlock(SourceLocation TryLoc, Stmt *TryBlock, // Exceptions aren't allowed in CUDA device code. if (getLangOpts().CUDA) - CUDADiagIfDeviceCode(TryLoc, diag::err_cuda_device_exceptions) - << "try" << llvm::to_underlying(CurrentCUDATarget()); + CUDA().DiagIfDeviceCode(TryLoc, diag::err_cuda_device_exceptions) + << "try" << llvm::to_underlying(CUDA().CurrentTarget()); if (getCurScope() && getCurScope()->isOpenMPSimdDirectiveScope()) Diag(TryLoc, diag::err_omp_simd_region_cannot_use_stmt) << "try"; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index e0f5e53dc2481..95171359f0ab1 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -33,6 +33,7 @@ #include "clang/Sema/Overload.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateDeduction.h" @@ -10155,9 +10156,9 @@ bool Sema::CheckFunctionTemplateSpecialization( // take target attributes into account, we reject candidates // here that have a different target. if (LangOpts.CUDA && - IdentifyCUDATarget(Specialization, - /* IgnoreImplicitHDAttr = */ true) != - IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttr = */ true)) { + CUDA().IdentifyTarget(Specialization, + /* IgnoreImplicitHDAttr = */ true) != + CUDA().IdentifyTarget(FD, /* IgnoreImplicitHDAttr = */ true)) { FailedCandidates.addCandidate().set( I.getPair(), FunTmpl->getTemplatedDecl(), MakeDeductionFailureInfo( @@ -10328,7 +10329,7 @@ bool Sema::CheckFunctionTemplateSpecialization( // virtue e.g. of being constexpr, and it passes these implicit // attributes on to its specializations.) if (LangOpts.CUDA) - inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate()); + CUDA().inheritTargetAttrs(FD, *Specialization->getPrimaryTemplate()); // The "previous declaration" for this function template specialization is // the prior function template specialization. @@ -11364,9 +11365,9 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S, // target attributes into account, we reject candidates here that // have a different target. if (LangOpts.CUDA && - IdentifyCUDATarget(Specialization, - /* IgnoreImplicitHDAttr = */ true) != - IdentifyCUDATarget(D.getDeclSpec().getAttributes())) { + CUDA().IdentifyTarget(Specialization, + /* IgnoreImplicitHDAttr = */ true) != + CUDA().IdentifyTarget(D.getDeclSpec().getAttributes())) { FailedCandidates.addCandidate().set( P.getPair(), FunTmpl->getTemplatedDecl(), MakeDeductionFailureInfo( diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 7f510d34d671e..c0469a47ab8b6 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -26,6 +26,7 @@ #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateInstCallback.h" @@ -5537,7 +5538,7 @@ void Sema::InstantiateVariableInitializer( } if (getLangOpts().CUDA) - checkAllowedCUDAInitializer(Var); + CUDA().checkAllowedInitializer(Var); } /// Instantiate the definition of the given variable from its diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index b3f6078952f6e..404c4e8e31b55 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -33,6 +33,7 @@ #include "clang/Sema/Lookup.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateInstCallback.h" @@ -2735,7 +2736,7 @@ QualType Sema::BuildArrayType(QualType T, ArraySizeModifier ASM, bool IsCUDADevice = (getLangOpts().CUDA && getLangOpts().CUDAIsDevice); targetDiag(Loc, IsCUDADevice ? diag::err_cuda_vla : diag::err_vla_unsupported) - << (IsCUDADevice ? llvm::to_underlying(CurrentCUDATarget()) : 0); + << (IsCUDADevice ? llvm::to_underlying(CUDA().CurrentTarget()) : 0); } else if (sema::FunctionScopeInfo *FSI = getCurFunction()) { // VLAs are supported on this target, but we may need to do delayed // checking that the VLA is not being used within a coroutine. @@ -3618,7 +3619,7 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, // D.getDeclarationAttributes()) because those are always C++11 attributes, // and those don't get distributed. distributeTypeAttrsFromDeclarator( - state, T, SemaRef.IdentifyCUDATarget(D.getAttributes())); + state, T, SemaRef.CUDA().IdentifyTarget(D.getAttributes())); // Find the deduced type in this type. Look in the trailing return type if we // have one, otherwise in the DeclSpec type. @@ -4139,7 +4140,7 @@ static CallingConv getCCForDeclaratorChunk( // handleFunctionTypeAttr. CallingConv CC; if (!S.CheckCallingConvAttr(AL, CC, /*FunctionDecl=*/nullptr, - S.IdentifyCUDATarget(D.getAttributes())) && + S.CUDA().IdentifyTarget(D.getAttributes())) && (!FTI.isVariadic || supportsVariadicCall(CC))) { return CC; } @@ -5825,7 +5826,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // See if there are any attributes on this declarator chunk. processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs(), - S.IdentifyCUDATarget(D.getAttributes())); + S.CUDA().IdentifyTarget(D.getAttributes())); if (DeclType.Kind != DeclaratorChunk::Paren) { if (ExpectNoDerefChunk && !IsNoDerefableChunk(DeclType)) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index ce96ce2bdbcce..c468884748387 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -78,6 +78,7 @@ #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Scope.h" #include "clang/Sema/Sema.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/Weak.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTDeserializationListener.h" @@ -3995,7 +3996,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, if (Record.size() != 1) return llvm::createStringError(std::errc::illegal_byte_sequence, "invalid cuda pragma options record"); - ForceCUDAHostDeviceDepth = Record[0]; + ForceHostDeviceDepth = Record[0]; break; case ALIGN_PACK_PRAGMA_OPTIONS: { @@ -8274,7 +8275,7 @@ void ASTReader::UpdateSema() { PragmaMSPointersToMembersState, PointersToMembersPragmaLocation); } - SemaObj->ForceCUDAHostDeviceDepth = ForceCUDAHostDeviceDepth; + SemaObj->CUDA().ForceHostDeviceDepth = ForceHostDeviceDepth; if (PragmaAlignPackCurrentValue) { // The bottom of the stack might have a default value. It must be adjusted diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index d9ba10ab60878..88f93feaf5cb0 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -65,6 +65,7 @@ #include "clang/Sema/IdentifierResolver.h" #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Sema.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/Weak.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTReader.h" @@ -4335,8 +4336,8 @@ void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) { Stream.EmitRecord(OPENCL_EXTENSIONS, Record); } void ASTWriter::WriteCUDAPragmas(Sema &SemaRef) { - if (SemaRef.ForceCUDAHostDeviceDepth > 0) { - RecordData::value_type Record[] = {SemaRef.ForceCUDAHostDeviceDepth}; + if (SemaRef.CUDA().ForceHostDeviceDepth > 0) { + RecordData::value_type Record[] = {SemaRef.CUDA().ForceHostDeviceDepth}; Stream.EmitRecord(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH, Record); } }