-
Notifications
You must be signed in to change notification settings - Fork 13.5k
Revert "[mlir][test][gpu] Migrate CUDA tests to the TargetAttr compilation workflow (#65768) #65848
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
…ation workflow (llvm#65768)" This reverts commit d21b672.
@llvm/pr-subscribers-mlir ChangesThis reverts commit d21b672.Patch is 27.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/65848.diff 25 Files Affected:
diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h index e0f4c71051e506a..46f29c6dd8b9267 100644 --- a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h +++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h @@ -16,7 +16,9 @@ namespace mlir { class LLVMTypeConverter; class ConversionTarget; class RewritePatternSet; -class Pass; + +template +class OperationPass; namespace gpu { class GPUModuleOp; @@ -43,6 +45,14 @@ void populateGpuSubgroupReduceOpLoweringPattern(LLVMTypeConverter &converter, /// Collect a set of patterns to convert WMMA ops from GPU dialect to NVVM. void populateGpuWMMAToNVVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns); + +/// Creates a pass that lowers GPU dialect operations to NVVM counterparts. The +/// index bitwidth used for the lowering of the device side index computations +/// is configurable. +std::unique_ptr> createLowerGpuOpsToNVVMOpsPass( + unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout, + bool hasRedux = false); + } // namespace mlir #endif // MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_ diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 3218760931b8cb0..ed37abf85275bf3 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -486,6 +486,7 @@ def LowerHostCodeToLLVMPass : Pass<"lower-host-to-llvm", "ModuleOp"> { def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { let summary = "Generate NVVM operations for gpu operations"; + let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()"; let dependentDialects = [ "cf::ControlFlowDialect", "memref::MemRefDialect", diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 764b6a779b98c7a..06469dc82b3fc5d 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -210,7 +210,11 @@ struct GPULaneIdOpToNVVM : ConvertOpToLLVMPattern { /// code. struct LowerGpuOpsToNVVMOpsPass : public impl::ConvertGpuOpsToNVVMOpsBase { - using Base::Base; + LowerGpuOpsToNVVMOpsPass() = default; + LowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth, bool hasRedux = false) { + this->indexBitwidth = indexBitwidth; + this->hasRedux = hasRedux; + } void runOnOperation() override { gpu::GPUModuleOp m = getOperation(); @@ -374,3 +378,8 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter, "__nv_tanh"); populateOpPatterns(converter, patterns, "__nv_tanf", "__nv_tan"); } + +std::unique_ptr> +mlir::createLowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth, bool hasRedux) { + return std::make_unique(indexBitwidth, hasRedux); +} diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp index 24c4c4c43a93dea..a7fd5a25e68314e 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -52,7 +52,7 @@ void mlir::sparse_tensor::buildSparseCompiler( pm.addPass(createSparseGPUCodegenPass()); pm.addNestedPass(createStripDebugInfoPass()); pm.addNestedPass(createConvertSCFToCFPass()); - pm.addNestedPass(createConvertGpuOpsToNVVMOps()); + pm.addNestedPass(createLowerGpuOpsToNVVMOpsPass()); } // TODO(springerm): Add sparse support to the BufferDeallocation pass and add diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir index 0cb06b7bf1d2001..2c1ae3ee840d0f8 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm -debug-only=serialize-to-isa \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{dump-ptx}))' \ // RUN: 2>&1 | FileCheck %s // CHECK: Generated by LLVM NVPTX Back-End diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir index 80972f244ec02d7..8eb90fd3ca9946e 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir @@ -2,9 +2,10 @@ // NOTE: this test requires gpu-sm80 // // RUN: mlir-opt \ -// RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse)" \ +// RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse,gpu.module(gpu-to-cubin{chip=sm_80 features=+ptx71}))" \ // RUN: %s \ -// RUN: | mlir-opt --test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx71" \ +// RUN: | mlir-opt --convert-vector-to-scf --convert-scf-to-cf -convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-arith-to-llvm --gpu-to-llvm --reconcile-unrealized-casts \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_c_runner_utils \ diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir index 8c991493a2b0174..8571c5ca5f3dc26 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir @@ -1,7 +1,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" -canonicalize |\ // RUN: mlir-opt -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if |\ // RUN: mlir-opt -lower-affine -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm \ -// RUN: -convert-arith-to-llvm -test-lower-to-nvvm | \ +// RUN: -convert-arith-to-llvm -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_cuda_runtime \ // RUN: -shared-libs=%mlir_c_runner_utils \ diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir index f26c18c4ae3dd28..c671c1843862f94 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir @@ -2,7 +2,9 @@ // everything on the same thread. // RUN: mlir-opt %s -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -test-lower-to-nvvm | \ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_cuda_runtime \ // RUN: -shared-libs=%mlir_c_runner_utils \ @@ -13,7 +15,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write" \ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -test-lower-to-nvvm | \ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_cuda_runtime \ // RUN: -shared-libs=%mlir_c_runner_utils \ @@ -23,7 +27,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" \ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -test-lower-to-nvvm | \ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_cuda_runtime \ // RUN: -shared-libs=%mlir_c_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir index 591bf1b4fd18231..535ba52d66f0062 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm="cubin-chip=sm_70" \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70}))' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32-bare-ptr.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32-bare-ptr.mlir index 51bd23f817b33f1..c4ca46521eeb4c7 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32-bare-ptr.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32-bare-ptr.mlir @@ -3,7 +3,9 @@ // Similar to the wmma-matmul-f32 but but with the memref bare pointer lowering convention. // This test also uses gpu.memcpy operations (instead of gpu.host_register). // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm="host-bare-ptr-calling-convention=1 kernel-bare-ptr-calling-convention=1 cubin-chip=sm_70" \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm{use-bare-ptr-memref-call-conv=1},gpu-to-cubin{chip=sm_70}))' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm="use-bare-pointers-for-host=1 use-bare-pointers-for-kernels=1" \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir index 0307b3d504be9f6..ae410dce281b17a 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm="cubin-chip=sm_70" \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70}))' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir index b131b8682ddee06..f4324a14a36b64a 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ @@ -8,7 +10,9 @@ // Same as above but with the memref bare pointer lowering convention. // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm="kernel-bare-ptr-calling-convention=1" \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm{use-bare-ptr-memref-call-conv=1},gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm="use-bare-pointers-for-kernels=1" \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir index 155423db7e05049..0a8d38f14527914 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir index e5047b6efa3bf25..bcd785d35291c66 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir index 163e9fdba60c1a9..aa4b0e8820479dc 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir index 381db2639c371f3..2e7d046c3921411 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir index 23c6c117e67f36b..32cfa27c8988a2c 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir index 3c5a100b5b90d57..30767b9495b6f20 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -test-lower-to-nvvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir index d2a5127a34c3bdd..e6dd91ace974322 100644 --- a/mlir/test/Integration/GPU/CUDA/async.mlir +++ b/mlir/test/Integration/GPU/CUDA/async.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)' \ -// RUN: | mlir-opt -gpu-async-region -gpu-to-llvm -gpu-module-to-binary \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-async-region -gpu-to-llvm \ // RUN: | mlir-opt -async-to-async-runtime -async-runtime-ref-counting \ // RUN: | ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This reverts commit d21b672.