From 06178712668fe368f83d06d233710dbfd83c49ba Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Fri, 8 Sep 2023 22:04:58 +0000 Subject: [PATCH 1/2] [mlir][gpu] Deprecate gpu::Serialziation* passes. Deprecate the `gpu-to-cubin` & `gpu-to-hsaco` passes in favor of the `TargetAttr` workflow. This patch removes remaining upstream uses of the aforementioned passes, including the option to use them in `mlir-opt`. A future patch will remove these passes entirely. --- .../mlir/Dialect/GPU/Transforms/Passes.h | 4 + mlir/include/mlir/InitAllPasses.h | 2 - .../SparseTensor/Pipelines/CMakeLists.txt | 8 -- .../Pipelines/SparseTensorPipelines.cpp | 10 ++- mlir/test/Conversion/GPUToCUDA/lit.local.cfg | 2 - .../GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir | 25 ------- mlir/test/Conversion/GPUToROCm/lit.local.cfg | 2 - .../lower-rocdl-kernel-to-hsaco.mlir | 25 ------- mlir/test/lib/Dialect/GPU/CMakeLists.txt | 11 --- .../GPU/TestConvertGPUKernelToCubin.cpp | 73 ------------------- .../GPU/TestConvertGPUKernelToHsaco.cpp | 72 ------------------ mlir/tools/mlir-opt/mlir-opt.cpp | 6 -- 12 files changed, 10 insertions(+), 230 deletions(-) delete mode 100644 mlir/test/Conversion/GPUToCUDA/lit.local.cfg delete mode 100644 mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir delete mode 100644 mlir/test/Conversion/GPUToROCm/lit.local.cfg delete mode 100644 mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir delete mode 100644 mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp delete mode 100644 mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h index 033e8755501f9..2a891a7d24f80 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -134,14 +134,17 @@ class SerializeToBlobPass : public OperationPass { /// Register pass to serialize GPU kernel functions to a CUBIN binary /// annotation. +LLVM_DEPRECATED("use Target attributes instead", "") void registerGpuSerializeToCubinPass(); /// Register pass to serialize GPU kernel functions to a HSAco binary /// annotation. +LLVM_DEPRECATED("use Target attributes instead", "") void registerGpuSerializeToHsacoPass(); /// Create an instance of the GPU kernel function to CUBIN binary serialization /// pass with optLevel (default level 2). +LLVM_DEPRECATED("use Target attributes instead", "") std::unique_ptr createGpuSerializeToCubinPass(StringRef triple, StringRef chip, StringRef features, @@ -150,6 +153,7 @@ std::unique_ptr createGpuSerializeToCubinPass(StringRef triple, /// Create an instance of the GPU kernel function to HSAco binary serialization /// pass. +LLVM_DEPRECATED("use Target attributes instead", "") std::unique_ptr createGpuSerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h index 8f3f92ae43145..f7271737c66d1 100644 --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -65,8 +65,6 @@ inline void registerAllPasses() { bufferization::registerBufferizationPasses(); func::registerFuncPasses(); registerGPUPasses(); - registerGpuSerializeToCubinPass(); - registerGpuSerializeToHsacoPass(); registerLinalgPasses(); registerNVGPUPasses(); registerSparseTensorPasses(); diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt index 3cf530abd744e..234a0d82babef 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt @@ -27,11 +27,3 @@ add_mlir_dialect_library(MLIRSparseTensorPipelines MLIRVectorToLLVM MLIRVectorTransforms ) - -if(MLIR_ENABLE_CUDA_RUNNER) - # Enable gpu-to-cubin pass. - target_compile_definitions(obj.MLIRSparseTensorPipelines - PRIVATE - MLIR_GPU_TO_CUBIN_PASS_ENABLE=1 - ) -endif() diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp index 24c4c4c43a93d..37f9e09d34c04 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -78,11 +78,13 @@ void mlir::sparse_tensor::buildSparseCompiler( // Finalize GPU code generation. if (gpuCodegen) { -#if MLIR_GPU_TO_CUBIN_PASS_ENABLE - pm.addNestedPass(createGpuSerializeToCubinPass( - options.gpuTriple, options.gpuChip, options.gpuFeatures)); -#endif + GpuNVVMAttachTargetOptions nvvmTargetOptions; + nvvmTargetOptions.triple = options.gpuTriple; + nvvmTargetOptions.chip = options.gpuChip; + nvvmTargetOptions.features = options.gpuFeatures; + pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions)); pm.addPass(createGpuToLLVMConversionPass()); + pm.addPass(createGpuModuleToBinaryPass()); } pm.addPass(createReconcileUnrealizedCastsPass()); diff --git a/mlir/test/Conversion/GPUToCUDA/lit.local.cfg b/mlir/test/Conversion/GPUToCUDA/lit.local.cfg deleted file mode 100644 index bc470ccc5733a..0000000000000 --- a/mlir/test/Conversion/GPUToCUDA/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not config.run_cuda_tests: - config.unsupported = True diff --git a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir deleted file mode 100644 index 0a2ac552a7c6d..0000000000000 --- a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir +++ /dev/null @@ -1,25 +0,0 @@ -// RUN: mlir-opt %s --test-gpu-to-cubin | FileCheck %s - -// CHECK: gpu.module @foo attributes {gpu.binary = "CUBIN"} -gpu.module @foo { - llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr) - // CHECK: attributes {gpu.kernel} - attributes { gpu.kernel } { - llvm.return - } -} - -// CHECK: gpu.module @bar attributes {gpu.binary = "CUBIN"} -gpu.module @bar { - // CHECK: func @kernel_a - llvm.func @kernel_a() - attributes { gpu.kernel } { - llvm.return - } - - // CHECK: func @kernel_b - llvm.func @kernel_b() - attributes { gpu.kernel } { - llvm.return - } -} diff --git a/mlir/test/Conversion/GPUToROCm/lit.local.cfg b/mlir/test/Conversion/GPUToROCm/lit.local.cfg deleted file mode 100644 index 2f5cc9f3bad97..0000000000000 --- a/mlir/test/Conversion/GPUToROCm/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not config.run_rocm_tests: - config.unsupported = True diff --git a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir deleted file mode 100644 index 8e27de4b60de7..0000000000000 --- a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir +++ /dev/null @@ -1,25 +0,0 @@ -// RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s - -// CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"} -gpu.module @foo { - llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr) - // CHECK: attributes {gpu.kernel} - attributes { gpu.kernel } { - llvm.return - } -} - -// CHECK: gpu.module @bar attributes {gpu.binary = "HSACO"} -gpu.module @bar { - // CHECK: func @kernel_a - llvm.func @kernel_a() - attributes { gpu.kernel } { - llvm.return - } - - // CHECK: func @kernel_b - llvm.func @kernel_b() - attributes { gpu.kernel } { - llvm.return - } -} diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt index ac96229e80a07..80edd04b691a5 100644 --- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt @@ -31,8 +31,6 @@ set(LIBS ) add_mlir_library(MLIRGPUTestPasses - TestConvertGPUKernelToCubin.cpp - TestConvertGPUKernelToHsaco.cpp TestGpuMemoryPromotion.cpp TestGpuRewrite.cpp TestLowerToNVVM.cpp @@ -43,12 +41,3 @@ add_mlir_library(MLIRGPUTestPasses ${LIBS} ) -# This is how it is defined in mlir/lib/Dialect/GPU/CMakeLists.txt -# We probably want something better project-wide -if(MLIR_ENABLE_CUDA_RUNNER) - # Enable gpu-to-cubin pass. - target_compile_definitions(MLIRGPUTestPasses - PRIVATE - MLIR_GPU_TO_CUBIN_PASS_ENABLE=1 - ) -endif() diff --git a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp b/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp deleted file mode 100644 index 1c442b0147c8b..0000000000000 --- a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp +++ /dev/null @@ -1,73 +0,0 @@ -//===- TestConvertGPUKernelToCubin.cpp - Test gpu kernel cubin lowering ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/GPU/Transforms/Passes.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Export.h" -#include "llvm/Support/TargetSelect.h" - -using namespace mlir; - -#if MLIR_CUDA_CONVERSIONS_ENABLED -namespace { -class TestSerializeToCubinPass - : public PassWrapper { -public: - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSerializeToCubinPass) - - StringRef getArgument() const final { return "test-gpu-to-cubin"; } - StringRef getDescription() const final { - return "Lower GPU kernel function to CUBIN binary annotations"; - } - TestSerializeToCubinPass(); - -private: - void getDependentDialects(DialectRegistry ®istry) const override; - - // Serializes PTX to CUBIN. - std::unique_ptr> - serializeISA(const std::string &isa) override; -}; -} // namespace - -TestSerializeToCubinPass::TestSerializeToCubinPass() { - this->triple = "nvptx64-nvidia-cuda"; - this->chip = "sm_35"; - this->features = "+ptx60"; -} - -void TestSerializeToCubinPass::getDependentDialects( - DialectRegistry ®istry) const { - registerNVVMDialectTranslation(registry); - gpu::SerializeToBlobPass::getDependentDialects(registry); -} - -std::unique_ptr> -TestSerializeToCubinPass::serializeISA(const std::string &) { - std::string data = "CUBIN"; - return std::make_unique>(data.begin(), data.end()); -} - -namespace mlir { -namespace test { -// Register test pass to serialize GPU module to a CUBIN binary annotation. -void registerTestGpuSerializeToCubinPass() { - PassRegistration([] { - // Initialize LLVM NVPTX backend. - LLVMInitializeNVPTXTarget(); - LLVMInitializeNVPTXTargetInfo(); - LLVMInitializeNVPTXTargetMC(); - LLVMInitializeNVPTXAsmPrinter(); - - return std::make_unique(); - }); -} -} // namespace test -} // namespace mlir -#endif // MLIR_CUDA_CONVERSIONS_ENABLED diff --git a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp b/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp deleted file mode 100644 index c204e86632ac9..0000000000000 --- a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp +++ /dev/null @@ -1,72 +0,0 @@ -//===- TestConvertGPUKernelToHsaco.cpp - Test gpu kernel hsaco lowering ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/GPU/Transforms/Passes.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Export.h" -#include "llvm/Support/TargetSelect.h" - -using namespace mlir; - -#if MLIR_ROCM_CONVERSIONS_ENABLED -namespace { -class TestSerializeToHsacoPass - : public PassWrapper { -public: - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSerializeToHsacoPass) - - StringRef getArgument() const final { return "test-gpu-to-hsaco"; } - StringRef getDescription() const final { - return "Lower GPU kernel function to HSAco binary annotations"; - } - TestSerializeToHsacoPass(); - -private: - void getDependentDialects(DialectRegistry ®istry) const override; - - // Serializes ROCDL IR to HSACO. - std::unique_ptr> - serializeISA(const std::string &isa) override; -}; -} // namespace - -TestSerializeToHsacoPass::TestSerializeToHsacoPass() { - this->triple = "amdgcn-amd-amdhsa"; - this->chip = "gfx900"; -} - -void TestSerializeToHsacoPass::getDependentDialects( - DialectRegistry ®istry) const { - registerROCDLDialectTranslation(registry); - gpu::SerializeToBlobPass::getDependentDialects(registry); -} - -std::unique_ptr> -TestSerializeToHsacoPass::serializeISA(const std::string &) { - std::string data = "HSACO"; - return std::make_unique>(data.begin(), data.end()); -} - -namespace mlir { -namespace test { -// Register test pass to serialize GPU module to a HSAco binary annotation. -void registerTestGpuSerializeToHsacoPass() { - PassRegistration([] { - // Initialize LLVM AMDGPU backend. - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeAMDGPUAsmPrinter(); - - return std::make_unique(); - }); -} -} // namespace test -} // namespace mlir -#endif // MLIR_ROCM_CONVERSIONS_ENABLED diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index a8aeffec1ae72..22eca9bcff6ff 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -80,8 +80,6 @@ void registerTestCallGraphPass(); void registerTestCfAssertPass(); void registerTestConstantFold(); void registerTestControlFlowSink(); -void registerTestGpuSerializeToCubinPass(); -void registerTestGpuSerializeToHsacoPass(); void registerTestDataLayoutPropagation(); void registerTestDataLayoutQuery(); void registerTestDeadCodeAnalysisPass(); @@ -204,11 +202,7 @@ void registerTestPasses() { mlir::test::registerTestDiagnosticsPass(); mlir::test::registerTestDialectConversionPasses(); #if MLIR_CUDA_CONVERSIONS_ENABLED - mlir::test::registerTestGpuSerializeToCubinPass(); mlir::test::registerTestLowerToNVVM(); -#endif -#if MLIR_ROCM_CONVERSIONS_ENABLED - mlir::test::registerTestGpuSerializeToHsacoPass(); #endif mlir::test::registerTestDecomposeCallGraphTypes(); mlir::test::registerTestDataLayoutPropagation(); From 2c074e2327c2ca41a0627e3e7fd997f4f4d71506 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Mon, 11 Sep 2023 15:05:49 +0000 Subject: [PATCH 2/2] Add the MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION option to enable the deprecated passes in mlir-opt --- mlir/CMakeLists.txt | 1 + mlir/tools/mlir-opt/CMakeLists.txt | 8 ++++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index fa4f6e76f985f..bbbcb0703f20f 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -114,6 +114,7 @@ else() endif() add_definitions(-DMLIR_ROCM_CONVERSIONS_ENABLED=${MLIR_ENABLE_ROCM_CONVERSIONS}) +set(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION 0 CACHE BOOL "Enable deprecated GPU serialization passes") set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner") set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner") set(MLIR_ENABLE_SPIRV_CPU_RUNNER 0 CACHE BOOL "Enable building the mlir SPIR-V cpu runner") diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt index df1e9480c754b..88a0562cb6e72 100644 --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -91,3 +91,11 @@ llvm_update_compile_flags(mlir-opt) mlir_check_all_link_libraries(mlir-opt) export_executable_symbols_for_plugins(mlir-opt) + +if(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION) + # Enable deprecated serialization passes. + target_compile_definitions(mlir-opt + PRIVATE + MLIR_DEPRECATED_GPU_SERIALIZATION_ENABLE=1 + ) +endif() diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 22eca9bcff6ff..b7647d7de78a1 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -264,6 +264,10 @@ void registerTestPasses() { int main(int argc, char **argv) { registerAllPasses(); +#if MLIR_DEPRECATED_GPU_SERIALIZATION_ENABLE == 1 + registerGpuSerializeToCubinPass(); + registerGpuSerializeToHsacoPass(); +#endif #ifdef MLIR_INCLUDE_TESTS registerTestPasses(); #endif