From 341b93dc9407fc76303db9c68dbdc8db4ce70e29 Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Fri, 21 Feb 2025 17:16:52 -0500 Subject: [PATCH 1/5] Test and benchmark updates for new hipGetLastError behaviour Future HIP versions will change the behaviour of hipGetLastError slightly. Currently, the function returns any error that occured in the last HIP API call in the current host thread. In other words, the error it reports is cleared with each HIP API call. In the future, the function will return any error that occurred in any HIP API call in the current host thread, since the last time that hipGetLastError was called. In other words, the error it reports will be cleared only on a call to hipGetLastError. A number of rocPRIM tests and benchmarks currently rely on the old behaviour of hipGetLastError. In order to make sure that they continue to work with the future changes, we need to call hipGetLastError before the test/benchmark code is run, so that any previous errors that may have occurred (eg. a call to hipMalloc that failed due to insufficient memory - which happens on some architectures for large test input sizes) get cleared before the test/benchmark calls hipGetLastError. This change: - modifies the HIP_CHECK macro so that it clears hipGetLastError before and after the HIP API call it wraps. It now checks for two types of errors: error returned from the wrapped function call, and errors reported by hipGetLastError after the wrapped call completes. - adds a HIP_CHECK_LAUNCH macro that can be used to wrap kernel calls. It clears any internally recorded HIP error before and after the kernel is invoked. Tests will fail if the hipGetLastError call invoked after the kernel returns an error code. - modifies the HIP_CHECK_MEMORY macro to clear an existing error before the memory allocation call it wraps. If the memory allocation call returns hipErrorOutOfMemory, then hipGetLastError is called again (afterwards) to clear the error. - modifies a few test files so that they use hipLaunchKernelGGL instead of the triple chevron syntax for launching kernels. The triple chevron syntax cannot be wrapped in a call to the HIP_LAUNCH_KERNEL marco. --- .../benchmark_block_adjacent_difference.cpp | 42 +- benchmark/benchmark_block_discontinuity.cpp | 20 +- benchmark/benchmark_block_exchange.cpp | 20 +- benchmark/benchmark_block_histogram.cpp | 20 +- benchmark/benchmark_block_radix_sort.cpp | 21 +- benchmark/benchmark_block_reduce.cpp | 18 +- benchmark/benchmark_block_scan.cpp | 18 +- benchmark/benchmark_block_sort.parallel.hpp | 3 +- benchmark/benchmark_config_dispatch.cpp | 3 +- ...enchmark_device_adjacent_find.parallel.hpp | 3 - benchmark/benchmark_warp_reduce.cpp | 38 +- benchmark/benchmark_warp_sort.cpp | 45 +- common/utils.hpp | 84 +++- test/common_test_header.hpp | 13 +- test/hip/test_hip_api.cpp | 22 +- test/hip/test_hip_async_copy.cpp | 8 +- test/hip/test_ordered_block_id.cpp | 12 +- test/rocprim/detail/get_rocprim_version.cpp | 13 +- .../internal/test_internal_merge_path.cpp | 15 +- test/rocprim/test_arg_index_iterator.cpp | 1 - ...test_block_adjacent_difference.kernels.hpp | 191 ++++---- .../test_block_discontinuity.kernels.hpp | 75 ++-- test/rocprim/test_block_exchange.kernels.hpp | 142 +++--- test/rocprim/test_block_histogram.kernels.hpp | 22 +- test/rocprim/test_block_load_store.hpp | 80 ++-- test/rocprim/test_block_radix_rank.hpp | 28 +- .../rocprim/test_block_radix_sort.kernels.hpp | 28 +- test/rocprim/test_block_reduce.kernels.hpp | 44 +- test/rocprim/test_block_run_length_decode.cpp | 23 +- test/rocprim/test_block_scan.kernels.hpp | 187 ++++---- test/rocprim/test_block_shuffle.hpp | 76 ++-- test/rocprim/test_block_sort.hpp | 30 +- test/rocprim/test_config_dispatch.cpp | 12 +- test/rocprim/test_constant_iterator.cpp | 1 - test/rocprim/test_counting_iterator.cpp | 1 - .../test_device_adjacent_difference.cpp | 1 - test/rocprim/test_device_adjacent_find.cpp | 1 - test/rocprim/test_device_find_end.cpp | 2 - test/rocprim/test_device_find_first_of.cpp | 2 - test/rocprim/test_device_merge.cpp | 2 - test/rocprim/test_device_merge_sort.cpp | 2 - test/rocprim/test_device_nth_element.cpp | 3 - test/rocprim/test_device_partial_sort.cpp | 4 - test/rocprim/test_device_reduce.cpp | 4 - test/rocprim/test_device_scan.cpp | 10 - test/rocprim/test_device_search.cpp | 2 - test/rocprim/test_device_search_n.cpp | 12 - test/rocprim/test_device_segmented_reduce.cpp | 1 - test/rocprim/test_device_transform.cpp | 3 - test/rocprim/test_intrinsics.cpp | 189 ++++---- .../rocprim/test_lookback_reproducibility.cpp | 3 - test/rocprim/test_texture_cache_iterator.cpp | 1 - test/rocprim/test_thread.cpp | 36 +- test/rocprim/test_thread_algos.cpp | 106 +++-- test/rocprim/test_transform_iterator.cpp | 2 +- test/rocprim/test_warp_exchange.cpp | 47 +- test/rocprim/test_warp_load.cpp | 35 +- test/rocprim/test_warp_reduce.hpp | 313 +++++++------ test/rocprim/test_warp_scan.hpp | 411 ++++++++++-------- test/rocprim/test_warp_sort.hpp | 48 +- test/rocprim/test_warp_store.cpp | 31 +- 61 files changed, 1465 insertions(+), 1165 deletions(-) diff --git a/benchmark/benchmark_block_adjacent_difference.cpp b/benchmark/benchmark_block_adjacent_difference.cpp index d9f7728b4..380647757 100644 --- a/benchmark/benchmark_block_adjacent_difference.cpp +++ b/benchmark/benchmark_block_adjacent_difference.cpp @@ -282,15 +282,17 @@ auto run_benchmark(benchmark::State& state, // Record start event HIP_CHECK(hipEventRecord(start, stream)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), - dim3(num_blocks), - dim3(BlockSize), - 0, - stream, - d_input, - d_output, - Trials); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), + dim3(num_blocks), + dim3(BlockSize), + 0, + stream, + d_input, + d_output, + Trials + ) + ); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); @@ -368,16 +370,18 @@ auto run_benchmark(benchmark::State& state, // Record start event HIP_CHECK(hipEventRecord(start, stream)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), - dim3(num_blocks), - dim3(BlockSize), - 0, - stream, - d_input, - d_tile_sizes, - d_output, - Trials); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), + dim3(num_blocks), + dim3(BlockSize), + 0, + stream, + d_input, + d_tile_sizes, + d_output, + Trials + ) + ); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/benchmark/benchmark_block_discontinuity.cpp b/benchmark/benchmark_block_discontinuity.cpp index 010def843..bf90a6733 100644 --- a/benchmark/benchmark_block_discontinuity.cpp +++ b/benchmark/benchmark_block_discontinuity.cpp @@ -230,15 +230,17 @@ void run_benchmark(benchmark::State& state, // Record start event HIP_CHECK(hipEventRecord(start, stream)); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input, - d_output); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input, + d_output + ) + ); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/benchmark/benchmark_block_exchange.cpp b/benchmark/benchmark_block_exchange.cpp index 6a5e211a7..477e29e3d 100644 --- a/benchmark/benchmark_block_exchange.cpp +++ b/benchmark/benchmark_block_exchange.cpp @@ -261,15 +261,17 @@ void run_benchmark(benchmark::State& state, // Record start event HIP_CHECK(hipEventRecord(start, stream)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input, - d_ranks, - d_output); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input, + d_ranks, + d_output + ) + ); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/benchmark/benchmark_block_histogram.cpp b/benchmark/benchmark_block_histogram.cpp index cf454b60c..6e103cc65 100644 --- a/benchmark/benchmark_block_histogram.cpp +++ b/benchmark/benchmark_block_histogram.cpp @@ -137,15 +137,17 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t bytes) // Record start event HIP_CHECK(hipEventRecord(start, stream)); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input, - d_output); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input, + d_output + ) + ); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/benchmark/benchmark_block_radix_sort.cpp b/benchmark/benchmark_block_radix_sort.cpp index a8f1122d2..34912aedb 100644 --- a/benchmark/benchmark_block_radix_sort.cpp +++ b/benchmark/benchmark_block_radix_sort.cpp @@ -178,17 +178,18 @@ void run_benchmark(benchmark::State& state, } else if(benchmark_kind == benchmark_kinds::sort_pairs) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - sort_pairs_kernel), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input, - d_output); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + sort_pairs_kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input, + d_output) + ); } - HIP_CHECK(hipGetLastError()); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/benchmark/benchmark_block_reduce.cpp b/benchmark/benchmark_block_reduce.cpp index 497c3cced..e4a3794d8 100644 --- a/benchmark/benchmark_block_reduce.cpp +++ b/benchmark/benchmark_block_reduce.cpp @@ -121,14 +121,16 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t bytes) // Record start event HIP_CHECK(hipEventRecord(start, stream)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input, - d_output); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input, + d_output + ) + ); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/benchmark/benchmark_block_scan.cpp b/benchmark/benchmark_block_scan.cpp index 0e4e3b7f0..f76231976 100644 --- a/benchmark/benchmark_block_scan.cpp +++ b/benchmark/benchmark_block_scan.cpp @@ -154,14 +154,16 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t bytes) // Record start event HIP_CHECK(hipEventRecord(start, stream)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input, - d_output); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input, + d_output + ) + ); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/benchmark/benchmark_block_sort.parallel.hpp b/benchmark/benchmark_block_sort.parallel.hpp index c00c11a40..0d0307945 100644 --- a/benchmark/benchmark_block_sort.parallel.hpp +++ b/benchmark/benchmark_block_sort.parallel.hpp @@ -273,9 +273,8 @@ struct block_sort_benchmark : public config_autotune_interface for(size_t i = 0; i < batch_size; ++i) { - dispatch_block_sort(stable_tag, size, stream, d_input, d_output); + HIP_CHECK_LAUNCH(dispatch_block_sort(stable_tag, size, stream, d_input, d_output)); } - HIP_CHECK(hipGetLastError()); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/benchmark/benchmark_config_dispatch.cpp b/benchmark/benchmark_config_dispatch.cpp index e1e6eda0c..ee7a52ac0 100644 --- a/benchmark/benchmark_config_dispatch.cpp +++ b/benchmark/benchmark_config_dispatch.cpp @@ -63,8 +63,7 @@ static void BM_kernel_launch(benchmark::State& state) for(auto _ : state) { - hipLaunchKernelGGL(empty_kernel, dim3(1), dim3(1), 0, stream); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH(hipLaunchKernelGGL(empty_kernel, dim3(1), dim3(1), 0, stream)); } HIP_CHECK(hipStreamSynchronize(stream)); } diff --git a/benchmark/benchmark_device_adjacent_find.parallel.hpp b/benchmark/benchmark_device_adjacent_find.parallel.hpp index 36e7fbed2..0b59d9ece 100644 --- a/benchmark/benchmark_device_adjacent_find.parallel.hpp +++ b/benchmark/benchmark_device_adjacent_find.parallel.hpp @@ -188,9 +188,6 @@ struct device_adjacent_find_benchmark : public config_autotune_interface HIP_CHECK(hipEventRecord(stop, stream)); HIP_CHECK(hipEventSynchronize(stop)); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - float elapsed_mseconds; HIP_CHECK(hipEventElapsedTime(&elapsed_mseconds, start, stop)); state.SetIterationTime(elapsed_mseconds / 1000); diff --git a/benchmark/benchmark_warp_reduce.cpp b/benchmark/benchmark_warp_reduce.cpp index 87a909225..3213175f2 100644 --- a/benchmark/benchmark_warp_reduce.cpp +++ b/benchmark/benchmark_warp_reduce.cpp @@ -95,14 +95,16 @@ inline auto execute_warp_reduce_kernel( T* input, T* output, Flag* /* flags */, size_t size, hipStream_t stream) -> typename std::enable_if::type { - hipLaunchKernelGGL(HIP_KERNEL_NAME(warp_reduce_kernel), - dim3(size / BlockSize), - dim3(BlockSize), - 0, - stream, - input, - output); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(warp_reduce_kernel), + dim3(size / BlockSize), + dim3(BlockSize), + 0, + stream, + input, + output + ) + ); } template typename std::enable_if::type { - hipLaunchKernelGGL(HIP_KERNEL_NAME(segmented_warp_reduce_kernel), - dim3(size / BlockSize), - dim3(BlockSize), - 0, - stream, - input, - flags, - output); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(segmented_warp_reduce_kernel), + dim3(size / BlockSize), + dim3(BlockSize), + 0, + stream, + input, + flags, + output + ) + ); } template), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input_key, - d_input_value, - d_output_key, - d_output_value); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + warp_sort_by_key_kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input_key, + d_input_value, + d_output_key, + d_output_value + ) + ); } } else @@ -169,17 +172,19 @@ void run_benchmark(benchmark::State& state, ROCPRIM_NO_UNROLL for(unsigned int trial = 0; trial < Trials; ++trial) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_sort_kernel), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input_key, - d_output_key); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_sort_kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input_key, + d_output_key + ) + ); } } - HIP_CHECK(hipGetLastError()); // Record stop event and wait until it completes HIP_CHECK(hipEventRecord(stop, stream)); diff --git a/common/utils.hpp b/common/utils.hpp index 6b4de63e9..c21380a8f 100644 --- a/common/utils.hpp +++ b/common/utils.hpp @@ -25,33 +25,71 @@ #include +// This macro is used to print error messages generated by HIP_CHECK, below. +// If we're using GTest, FAIL is called to log the Google Test trace. +// The lambda is invoked immediately as assertions that generate a fatal failure can +// only be used in void-returning functions. #ifdef USE_GTEST - // GoogleTest-compatible HIP_CHECK macro. FAIL is called to log the Google Test trace. - // The lambda is invoked immediately as assertions that generate a fatal failure can - // only be used in void-returning functions. - #define HIP_CHECK(condition) \ - { \ - hipError_t error = condition; \ - if(error != hipSuccess) \ - { \ - [error]() \ - { FAIL() << "HIP error " << error << ": " << hipGetErrorString(error); }(); \ - exit(error); \ - } \ - } + #define PRINT_ERROR(msg) \ + { \ + const std::string err = msg; \ + [err]() { FAIL() << err; }(); \ + } #else - #define HIP_CHECK(condition) \ - { \ - hipError_t error = condition; \ - if(error != hipSuccess) \ - { \ - std::cout << "HIP error: " << hipGetErrorString(error) << " file: " << __FILE__ \ - << " line: " << __LINE__ << std::endl; \ - exit(error); \ - } \ - } + #define PRINT_ERROR(msg) \ + { \ + const std::string err = msg; \ + std::cout << err; \ + } #endif +// GoogleTest-compatible HIP_CHECK macro. FAIL is called to log the Google Test trace. +// The lambda is invoked immediately as assertions that generate a fatal failure can +// only be used in void-returning functions. +#define HIP_CHECK(condition) \ + { \ + (void) hipGetLastError(); \ + hipError_t ret_error = condition; \ + hipError_t hip_error = hipGetLastError(); \ + \ + if (ret_error != hipSuccess || hip_error != hipSuccess) \ + { \ + std::ostringstream err_stream; \ + if (ret_error != hipSuccess) \ + { \ + err_stream << "Call returned " << ret_error << ": " \ + << hipGetErrorString(ret_error) << std::endl; \ + } \ + if (hip_error != hipSuccess && hip_error != ret_error) \ + { \ + err_stream << "HIP error " << hip_error << ": " \ + << hipGetErrorString(hip_error) << std::endl; \ + } \ + err_stream << "File: " << __FILE__ << " line: " << __LINE__; \ + const std::string err_msg = err_stream.str(); \ + PRINT_ERROR(err_msg); \ + exit((ret_error != hipSuccess ? ret_error : hip_error)); \ + } \ + } + +// GoogleTest-compatible HIP_CHECK macro that can be used for calls that don't return +// a hipError (eg. kernel launches). +#define HIP_CHECK_LAUNCH(launch) \ + { \ + (void) hipGetLastError(); \ + launch; \ + hipError_t error = hipGetLastError(); \ + if (error != hipSuccess) \ + { \ + std::ostringstream err_stream; \ + err_stream << "HIP error: " << hipGetErrorString(error) << std::endl \ + << "File: " << __FILE__ << " line: " << __LINE__; }(); \ + const std::string err_msg = err_stream.str(); \ + PRINT_ERROR(err_msg); \ + exit(error); \ + } \ + } + namespace common { template diff --git a/test/common_test_header.hpp b/test/common_test_header.hpp index 02a8d17ef..e46fe17d1 100755 --- a/test/common_test_header.hpp +++ b/test/common_test_header.hpp @@ -35,6 +35,7 @@ #include #include #include +#include #include "../common/utils.hpp" @@ -45,18 +46,22 @@ #include #include +// GoogleTest-compatible HIP_CHECK macro that can be used for calls that allocate memory within +// a loop. If condition fails with a hipErrorOutOfMemory, the loop iteration is skipped. #define HIP_CHECK_MEMORY(condition) \ { \ + (void) hipGetLastError(); \ hipError_t error = condition; \ - if(error == hipErrorOutOfMemory) \ + if (error == hipErrorOutOfMemory) \ { \ std::cout << "Out of memory. Skipping size = " << size << std::endl; \ + (void) hipGetLastError(); \ break; \ } \ - if(error != hipSuccess) \ + if (error != hipSuccess) \ { \ - std::cout << "HIP error: " << hipGetErrorString(error) << " line: " << __LINE__ \ - << std::endl; \ + [error]() { FAIL() << "HIP error: " << hipGetErrorString(error) \ + << " line: " << __LINE__; }(); \ exit(error); \ } \ } diff --git a/test/hip/test_hip_api.cpp b/test/hip/test_hip_api.cpp index bd7c6c469..fe676cb8b 100644 --- a/test/hip/test_hip_api.cpp +++ b/test/hip/test_hip_api.cpp @@ -57,16 +57,18 @@ TEST(HIPTests, Saxpy) common::device_ptr d_x(x); common::device_ptr d_y(y); - hipLaunchKernelGGL(HIP_KERNEL_NAME(saxpy_kernel), - dim3((N + 255) / 256), - dim3(256), - 0, - 0, - d_x.get(), - d_y.get(), - a, - N); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(saxpy_kernel), + dim3((N + 255) / 256), + dim3(256), + 0, + 0, + d_x.get(), + d_y.get(), + a, + N + ) + ); y = d_y.load(); diff --git a/test/hip/test_hip_async_copy.cpp b/test/hip/test_hip_async_copy.cpp index c0fbd233f..5f89bc585 100644 --- a/test/hip/test_hip_async_copy.cpp +++ b/test/hip/test_hip_async_copy.cpp @@ -166,14 +166,16 @@ TEST_F(HipAsyncCopyTests, AsyncCopyDepthFirst) const unsigned int grid_size = (sizes[i] + block_size - 1) / block_size; if(sizes[i] > 0) { - hipLaunchKernelGGL(HIP_KERNEL_NAME(increment_kernel), + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(increment_kernel), dim3(grid_size), dim3(block_size), 0, streams[i], d_inputs[i].get(), - sizes[i]); - HIP_CHECK(hipGetLastError()); + sizes[i] + ) + ); } HIP_CHECK(hipMemcpyAsync(outputs[i].data(), d_inputs[i].get(), diff --git a/test/hip/test_ordered_block_id.cpp b/test/hip/test_ordered_block_id.cpp index d452d408a..581677b20 100644 --- a/test/hip/test_ordered_block_id.cpp +++ b/test/hip/test_ordered_block_id.cpp @@ -53,9 +53,17 @@ bool test_func(int block_count, int thread_count) { common::device_ptr d_flags(block_count); - test_kernel<<>>(d_flags.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + test_kernel, + dim3(block_count), + dim3(thread_count), + 0, + hipStreamDefault, + d_flags.get() + ) + ); - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); auto h_vec = d_flags.load(); diff --git a/test/rocprim/detail/get_rocprim_version.cpp b/test/rocprim/detail/get_rocprim_version.cpp index 8b99a4604..d74aba5ab 100644 --- a/test/rocprim/detail/get_rocprim_version.cpp +++ b/test/rocprim/detail/get_rocprim_version.cpp @@ -37,9 +37,16 @@ unsigned int get_rocprim_version_on_device() { common::device_ptr d_version(1); - hipLaunchKernelGGL(get_version_kernel, dim3(1), dim3(1), 0, 0, d_version.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + get_version_kernel, + dim3(1), + dim3(1), + 0, + 0, + d_version.get() + ) + ); return d_version.load_value_at(0); } diff --git a/test/rocprim/internal/test_internal_merge_path.cpp b/test/rocprim/internal/test_internal_merge_path.cpp index bddaeeea7..972b5e04d 100644 --- a/test/rocprim/internal/test_internal_merge_path.cpp +++ b/test/rocprim/internal/test_internal_merge_path.cpp @@ -29,9 +29,18 @@ void serial_merge(std::vector& input, common::device_ptr device_data(input); - merge_kernel - <<<1, 1>>>(device_data.get(), rocprim::detail::range_t<>{0, mid, mid, N}, compare_function); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(merge_kernel), + dim3(1), + dim3(1), + 0, + 0, + device_data.get(), + rocprim::detail::range_t<>{0, mid, mid, N}, + compare_function + ) + ); output = device_data.load(); } diff --git a/test/rocprim/test_arg_index_iterator.cpp b/test/rocprim/test_arg_index_iterator.cpp index 9da6fa260..8bf2283a0 100644 --- a/test/rocprim/test_arg_index_iterator.cpp +++ b/test/rocprim/test_arg_index_iterator.cpp @@ -162,7 +162,6 @@ TYPED_TEST(RocprimArgIndexIteratorTests, ReduceArgMinimum) reduce_op, stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_block_adjacent_difference.kernels.hpp b/test/rocprim/test_block_adjacent_difference.kernels.hpp index 6b5cd6f3b..97f707458 100644 --- a/test/rocprim/test_block_adjacent_difference.kernels.hpp +++ b/test/rocprim/test_block_adjacent_difference.kernels.hpp @@ -439,17 +439,18 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr device_heads(size); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_heads_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_heads.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + flag_heads_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_heads.get() + ) + ); // Reading results const auto heads = device_heads.load(); @@ -541,17 +542,18 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr device_tails(size); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_tails_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_tails.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + flag_tails_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_tails.get() + ) + ); // Reading results const auto tails = device_tails.load(); @@ -659,20 +661,21 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr device_tails(size); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(flag_heads_and_tails_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_heads.get(), - device_tails.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(flag_heads_and_tails_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_heads.get(), + device_tails.get() + ) + ); // Reading results const auto heads = device_heads.load(); @@ -749,19 +752,21 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr d_output(size); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_left_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - d_input.get(), - d_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_left_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get() + ) + ); // Reading results const auto output = d_output.load(); @@ -836,19 +841,21 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr d_output(size); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_right_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - d_input.get(), - d_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_right_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get() + ) + ); // Reading results const auto output = d_output.load(); @@ -937,20 +944,22 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr d_output(size); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_left_partial_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - d_input.get(), - d_tile_sizes.get(), - d_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_left_partial_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + d_input.get(), + d_tile_sizes.get(), + d_output.get() + ) + ); // Reading results const auto output = d_output.load(); @@ -1039,20 +1048,22 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr d_output(size); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_right_partial_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - d_input.get(), - d_tile_sizes.get(), - d_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_right_partial_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + d_input.get(), + d_tile_sizes.get(), + d_output.get() + ) + ); // Reading results const auto output = d_output.load(); diff --git a/test/rocprim/test_block_discontinuity.kernels.hpp b/test/rocprim/test_block_discontinuity.kernels.hpp index 021de9af3..9c7eca251 100644 --- a/test/rocprim/test_block_discontinuity.kernels.hpp +++ b/test/rocprim/test_block_discontinuity.kernels.hpp @@ -263,17 +263,18 @@ auto test_block_discontinuity() common::device_ptr device_heads(size); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_heads_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_heads.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + flag_heads_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_heads.get() + ) + ); // Reading results const auto heads = device_heads.load_to_unique_ptr(); @@ -349,17 +350,18 @@ auto test_block_discontinuity() common::device_ptr device_tails(size); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_tails_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_tails.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + flag_tails_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_tails.get() + ) + ); const auto tails = device_tails.load_to_unique_ptr(); test_utils::assert_eq(tails.get(), @@ -447,20 +449,21 @@ auto test_block_discontinuity() common::device_ptr device_tails(size); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(flag_heads_and_tails_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_heads.get(), - device_tails.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(flag_heads_and_tails_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_heads.get(), + device_tails.get() + ) + ); const auto heads = device_heads.load_to_unique_ptr(); const auto tails = device_tails.load_to_unique_ptr(); diff --git a/test/rocprim/test_block_exchange.kernels.hpp b/test/rocprim/test_block_exchange.kernels.hpp index 9cc5bef9b..160c76809 100644 --- a/test/rocprim/test_block_exchange.kernels.hpp +++ b/test/rocprim/test_block_exchange.kernels.hpp @@ -249,17 +249,18 @@ auto test_block_exchange(int /*device_id*/) -> typename std::enable_if), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + blocked_to_striped_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results const auto output = device_output.load(); @@ -321,17 +322,18 @@ auto test_block_exchange(int /*device_id*/) -> typename std::enable_if), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + striped_to_blocked_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results const auto output = device_output.load(); @@ -406,17 +408,18 @@ auto test_block_exchange(int device_id) -> typename std::enable_if: // Running kernel constexpr unsigned int grid_size = (size / items_per_block); - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - blocked_to_warp_striped_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + blocked_to_warp_striped_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results const auto output = device_output.load(); @@ -491,17 +494,18 @@ auto test_block_exchange(int device_id) -> typename std::enable_if: // Running kernel constexpr unsigned int grid_size = (size / items_per_block); - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - warp_striped_to_blocked_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + warp_striped_to_blocked_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results const auto output = device_output.load(); @@ -572,18 +576,19 @@ auto test_block_exchange(int /*device_id*/) -> typename std::enable_if), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get(), - device_ranks.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + scatter_to_blocked_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get(), + device_ranks.get() + ) + ); // Reading results const auto output = device_output.load(); @@ -656,18 +661,19 @@ auto test_block_exchange(int /*device_id*/) -> typename std::enable_if), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get(), - device_ranks.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + scatter_to_striped_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get(), + device_ranks.get() + ) + ); // Reading results const auto output = device_output.load(); diff --git a/test/rocprim/test_block_histogram.kernels.hpp b/test/rocprim/test_block_histogram.kernels.hpp index 8351ec426..460c9dec9 100644 --- a/test/rocprim/test_block_histogram.kernels.hpp +++ b/test/rocprim/test_block_histogram.kernels.hpp @@ -168,16 +168,18 @@ void test_block_histogram_input_arrays() common::device_ptr device_output_bin(output_bin); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - histogram_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_output.get(), - device_output_bin.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + histogram_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_output.get(), + device_output_bin.get() + ) + ); // Reading results back output_bin = device_output_bin.load(); diff --git a/test/rocprim/test_block_load_store.hpp b/test/rocprim/test_block_load_store.hpp index 382a8061f..b48250481 100644 --- a/test/rocprim/test_block_load_store.hpp +++ b/test/rocprim/test_block_load_store.hpp @@ -85,16 +85,18 @@ typed_test_def(suite_name, name_suffix, LoadStoreClass) common::device_ptr device_output(size); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - load_store_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + load_store_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results from device const auto output = device_output.load(); @@ -169,19 +171,21 @@ typed_test_def(suite_name, name_suffix, LoadStoreClassValid) common::device_ptr device_output(output); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(load_store_valid_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get(), - valid); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(load_store_valid_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get(), + valid + ) + ); // Reading results from device output = device_output.load(); @@ -255,20 +259,22 @@ typed_test_def(suite_name, name_suffix, LoadStoreClassDefault) common::device_ptr device_output(size); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(load_store_valid_default_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get(), - valid, - _default); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(load_store_valid_default_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get(), + valid, + _default + ) + ); // Reading results from device const auto output = device_output.load(); diff --git a/test/rocprim/test_block_radix_rank.hpp b/test/rocprim/test_block_radix_rank.hpp index e078fdcfa..12f4bf04a 100644 --- a/test/rocprim/test_block_radix_rank.hpp +++ b/test/rocprim/test_block_radix_rank.hpp @@ -195,19 +195,21 @@ void test_block_radix_rank() common::device_ptr d_ranks_output(size); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - rank_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - d_keys_input.get(), - d_ranks_output.get(), - descending, - start_bit, - radix_bits); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + rank_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + d_keys_input.get(), + d_ranks_output.get(), + descending, + start_bit, + radix_bits + ) + ); // Getting results to host auto ranks_output = d_ranks_output.load(); diff --git a/test/rocprim/test_block_radix_sort.kernels.hpp b/test/rocprim/test_block_radix_sort.kernels.hpp index aa18c690e..2250a977c 100644 --- a/test/rocprim/test_block_radix_sort.kernels.hpp +++ b/test/rocprim/test_block_radix_sort.kernels.hpp @@ -243,13 +243,20 @@ auto test_block_radix_sort() -> typename std::enable_if::type // Preparing device common::device_ptr device_keys_output(keys_output, size); - sort_key_kernel - <<>>(device_keys_output.get(), - to_striped, - descending, - start_bit, - end_bit); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(sort_key_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_keys_output.get(), + to_striped, + descending, + start_bit, + end_bit + ) + ); // Getting results to host keys_output = device_keys_output.load_to_unique_ptr(); @@ -349,7 +356,7 @@ auto test_block_radix_sort() -> typename std::enable_if::type common::device_ptr device_values_output(values_output); // Running kernel - sort_key_value_kernel typename std::enable_if::type to_striped, descending, start_bit, - end_bit); - HIP_CHECK(hipGetLastError()); + end_bit + ) + )); // Getting results to host keys_output = device_keys_output.load_to_unique_ptr(); diff --git a/test/rocprim/test_block_reduce.kernels.hpp b/test/rocprim/test_block_reduce.kernels.hpp index 58fe3256f..2b641310f 100644 --- a/test/rocprim/test_block_reduce.kernels.hpp +++ b/test/rocprim/test_block_reduce.kernels.hpp @@ -102,12 +102,13 @@ struct static_run_algo ); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(reduce_kernel), - dim3(grid_size), dim3(BlockSize), 0, 0, - device_output, device_output_reductions + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(reduce_kernel), + dim3(grid_size), dim3(BlockSize), 0, 0, + device_output, device_output_reductions + ) ); - HIP_CHECK(hipGetLastError()); // Reading results back HIP_CHECK( @@ -175,12 +176,13 @@ struct static_run_valid ); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(reduce_valid_kernel), - dim3(grid_size), dim3(BlockSize), 0, 0, - device_output, device_output_reductions, valid_items + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(reduce_valid_kernel), + dim3(grid_size), dim3(BlockSize), 0, 0, + device_output, device_output_reductions, valid_items + ) ); - HIP_CHECK(hipGetLastError()); // Reading results back HIP_CHECK( @@ -286,16 +288,18 @@ void test_block_reduce_input_arrays() common::device_ptr device_output_reductions(output_reductions); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - reduce_array_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_output.get(), - device_output_reductions.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + reduce_array_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_output.get(), + device_output_reductions.get() + ) + ); // Reading results back output_reductions = device_output_reductions.load(); diff --git a/test/rocprim/test_block_run_length_decode.cpp b/test/rocprim/test_block_run_length_decode.cpp index 6470ab3a2..58bcedd58 100644 --- a/test/rocprim/test_block_run_length_decode.cpp +++ b/test/rocprim/test_block_run_length_decode.cpp @@ -233,18 +233,23 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode) common::device_ptr d_decoded_runs(expected.size()); common::device_ptr d_decoded_offsets(expected.size()); - block_run_length_decode_kernel - <<>>(d_run_items.get(), - d_run_lengths.get(), - d_decoded_runs.get(), - d_decoded_offsets.get()); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); + decoded_items_per_thread>), + dim3(1), + dim3(block_size), + 0, + 0, + d_run_items.get(), + d_run_lengths.get(), + d_decoded_runs.get(), + d_decoded_offsets.get() + ) + ); std::vector output = d_decoded_runs.load(); std::vector offsets = d_decoded_offsets.load(); diff --git a/test/rocprim/test_block_scan.kernels.hpp b/test/rocprim/test_block_scan.kernels.hpp index 4eca2c418..8ea8fc0b1 100644 --- a/test/rocprim/test_block_scan.kernels.hpp +++ b/test/rocprim/test_block_scan.kernels.hpp @@ -255,12 +255,13 @@ struct static_run_algo ); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(scan_kernel), - dim3(grid_size), dim3(BlockSize), 0, 0, - device_output, device_output_b, init + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(scan_kernel), + dim3(grid_size), dim3(BlockSize), 0, 0, + device_output, device_output_b, init + ) ); - HIP_CHECK(hipGetLastError()); // Reading results back HIP_CHECK( @@ -566,19 +567,19 @@ auto test_block_scan_input_arrays() common::device_ptr device_output(output); // Launching kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(inclusive_scan_array_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_output.get()); - - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(inclusive_scan_array_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_output.get() + ) + ); // Read from device memory output = device_output.load(); @@ -649,20 +650,20 @@ auto test_block_scan_input_arrays() common::device_ptr device_output_reductions(output_reductions); // Launching kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(inclusive_scan_reduce_array_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_output.get(), - device_output_reductions.get()); - - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(inclusive_scan_reduce_array_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_output.get(), + device_output_reductions.get() + ) + ); // Read from device memory output = device_output.load(); @@ -735,22 +736,22 @@ auto test_block_scan_input_arrays() common::device_ptr device_output_bp(output_block_prefixes); // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_array_prefix_callback_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_output.get(), - device_output_bp.get(), - block_prefix); - - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(inclusive_scan_array_prefix_callback_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_output.get(), + device_output_bp.get(), + block_prefix + ) + ); // Read from device memory output = device_output.load(); @@ -820,20 +821,20 @@ auto test_block_scan_input_arrays() common::device_ptr device_output(output); // Launching kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(exclusive_scan_array_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_output.get(), - init); - - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(exclusive_scan_array_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_output.get(), + init + ) + ); // Read from device memory output = device_output.load(); @@ -910,21 +911,21 @@ auto test_block_scan_input_arrays() common::device_ptr device_output_reductions(output_reductions.size()); // Launching kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(exclusive_scan_reduce_array_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_output.get(), - device_output_reductions.get(), - init); - - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(exclusive_scan_reduce_array_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_output.get(), + device_output_reductions.get(), + init + ) + ); // Read from device memory output = device_output.load(); @@ -1002,22 +1003,22 @@ auto test_block_scan_input_arrays() common::device_ptr device_output_bp(output_block_prefixes.size()); // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(exclusive_scan_prefix_callback_array_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_output.get(), - device_output_bp.get(), - block_prefix); - - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(exclusive_scan_prefix_callback_array_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_output.get(), + device_output_bp.get(), + block_prefix + ) + ); // Read from device memory output = device_output.load(); diff --git a/test/rocprim/test_block_shuffle.hpp b/test/rocprim/test_block_shuffle.hpp index 670902242..1771c11bb 100644 --- a/test/rocprim/test_block_shuffle.hpp +++ b/test/rocprim/test_block_shuffle.hpp @@ -61,15 +61,17 @@ typed_test_def(suite_name, name_suffix, BlockOffset) common::device_ptr device_output(input_data.size()); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_offset_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get(), - distance); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_offset_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get(), + distance + ) + ); // Reading results back const auto output_data = device_output.load(); @@ -115,15 +117,17 @@ typed_test_def(suite_name, name_suffix, BlockRotate) common::device_ptr device_output(input_data.size()); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_rotate_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get(), - distance); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_rotate_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get(), + distance + ) + ); // Reading results back const auto output_data = device_output.load(); @@ -173,14 +177,16 @@ typed_test_def(suite_name, name_suffix, BlockUp) common::device_ptr device_output(input_data.size()); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_up_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_up_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results back const auto output_data = device_output.load(); @@ -235,14 +241,16 @@ typed_test_def(suite_name, name_suffix, BlockDown) common::device_ptr device_output(input_data.size()); // Running kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_down_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_down_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results back const auto output_data = device_output.load(); diff --git a/test/rocprim/test_block_sort.hpp b/test/rocprim/test_block_sort.hpp index 89fa0fde6..f40655e00 100644 --- a/test/rocprim/test_block_sort.hpp +++ b/test/rocprim/test_block_sort.hpp @@ -114,20 +114,22 @@ void TestSortKeyValue() // Running kernel, ignored if invalid size if(size > 0) { - hipLaunchKernelGGL(HIP_KERNEL_NAME(sort_pairs_kernel), - dim3(grid_size), - dim3(block_size), - 0, - stream, - device_key_output.get(), - device_value_output.get(), - size); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(sort_pairs_kernel), + dim3(grid_size), + dim3(block_size), + 0, + stream, + device_key_output.get(), + device_value_output.get(), + size + ) + ); } // Reading results back diff --git a/test/rocprim/test_config_dispatch.cpp b/test/rocprim/test_config_dispatch.cpp index 6697bf10f..8a28d6910 100644 --- a/test/rocprim/test_config_dispatch.cpp +++ b/test/rocprim/test_config_dispatch.cpp @@ -75,8 +75,16 @@ TEST(RocprimConfigDispatchTests, HostMatchesDevice) common::device_ptr device_arch_ptr(1); - hipLaunchKernelGGL(write_target_arch, dim3(1), dim3(1), 0, stream, device_arch_ptr.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + write_target_arch, + dim3(1), + dim3(1), + 0, + stream, + device_arch_ptr.get() + ) + ); const auto device_arch = device_arch_ptr.load_value_at(0); diff --git a/test/rocprim/test_constant_iterator.cpp b/test/rocprim/test_constant_iterator.cpp index 9c56976aa..37c6e4b88 100644 --- a/test/rocprim/test_constant_iterator.cpp +++ b/test/rocprim/test_constant_iterator.cpp @@ -106,7 +106,6 @@ TYPED_TEST(RocprimConstantIteratorTests, Transform) transform(), stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_counting_iterator.cpp b/test/rocprim/test_counting_iterator.cpp index 41f9fbbea..493d7f714 100644 --- a/test/rocprim/test_counting_iterator.cpp +++ b/test/rocprim/test_counting_iterator.cpp @@ -112,7 +112,6 @@ TYPED_TEST(RocprimCountingIteratorTests, Transform) transform(), stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_device_adjacent_difference.cpp b/test/rocprim/test_device_adjacent_difference.cpp index 1e6880332..c35eecbf2 100644 --- a/test/rocprim/test_device_adjacent_difference.cpp +++ b/test/rocprim/test_device_adjacent_difference.cpp @@ -298,7 +298,6 @@ TYPED_TEST(RocprimDeviceAdjacentDifferenceTests, AdjacentDifference) rocprim::minus<>{}, stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); if(TestFixture::use_graphs) { diff --git a/test/rocprim/test_device_adjacent_find.cpp b/test/rocprim/test_device_adjacent_find.cpp index e840e242b..51a26c4d7 100644 --- a/test/rocprim/test_device_adjacent_find.cpp +++ b/test/rocprim/test_device_adjacent_find.cpp @@ -205,7 +205,6 @@ TYPED_TEST(RocprimDeviceAdjacentFindTests, AdjacentFind) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Allocate memory for output and copy to host side diff --git a/test/rocprim/test_device_find_end.cpp b/test/rocprim/test_device_find_end.cpp index 268cbc8bc..331395117 100644 --- a/test/rocprim/test_device_find_end.cpp +++ b/test/rocprim/test_device_find_end.cpp @@ -253,7 +253,6 @@ TYPED_TEST(RocprimDeviceFindEndTests, FindEnd) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -393,7 +392,6 @@ TYPED_TEST(RocprimDeviceFindEndTests, FindEndRepetition) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_device_find_first_of.cpp b/test/rocprim/test_device_find_first_of.cpp index ce4b7a8bf..589ac5c95 100644 --- a/test/rocprim/test_device_find_first_of.cpp +++ b/test/rocprim/test_device_find_first_of.cpp @@ -267,7 +267,6 @@ TYPED_TEST(RocprimDeviceFindFirstOfTests, FindFirstOf) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -357,7 +356,6 @@ TEST(RocprimDeviceFindFirstOfTests, LargeIndices) stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host and check diff --git a/test/rocprim/test_device_merge.cpp b/test/rocprim/test_device_merge.cpp index d5a6ca088..d2c4bcb95 100644 --- a/test/rocprim/test_device_merge.cpp +++ b/test/rocprim/test_device_merge.cpp @@ -232,7 +232,6 @@ TYPED_TEST(RocprimDeviceMergeTests, MergeKey) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); ASSERT_FALSE(out_of_bounds.get()); @@ -398,7 +397,6 @@ TYPED_TEST(RocprimDeviceMergeTests, MergeKeyValue) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); ASSERT_FALSE(out_of_bounds.get()); diff --git a/test/rocprim/test_device_merge_sort.cpp b/test/rocprim/test_device_merge_sort.cpp index 4b315d4a3..398d0d39e 100644 --- a/test/rocprim/test_device_merge_sort.cpp +++ b/test/rocprim/test_device_merge_sort.cpp @@ -198,7 +198,6 @@ TYPED_TEST(RocprimDeviceSortTests, SortKey) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -327,7 +326,6 @@ TYPED_TEST(RocprimDeviceSortTests, SortKeyValue) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_device_nth_element.cpp b/test/rocprim/test_device_nth_element.cpp index 672d49e26..65979701b 100644 --- a/test/rocprim/test_device_nth_element.cpp +++ b/test/rocprim/test_device_nth_element.cpp @@ -311,7 +311,6 @@ TYPED_TEST(RocprimDeviceNthelementTests, NthelementKey) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -389,8 +388,6 @@ TEST(RocprimNthelementKeySameTests, NthelementKeySame) stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); - // Copy output to host const auto output = d_input.load(); diff --git a/test/rocprim/test_device_partial_sort.cpp b/test/rocprim/test_device_partial_sort.cpp index 1550513cc..34a4b7e00 100644 --- a/test/rocprim/test_device_partial_sort.cpp +++ b/test/rocprim/test_device_partial_sort.cpp @@ -275,8 +275,6 @@ TYPED_TEST(RocprimDevicePartialSortTests, PartialSort) stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); - if(TestFixture::use_graphs) { gHelper.createAndLaunchGraph(stream); @@ -471,8 +469,6 @@ TYPED_TEST(RocprimDevicePartialSortTests, PartialSortCopy) stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); - if(TestFixture::use_graphs) { gHelper.createAndLaunchGraph(stream); diff --git a/test/rocprim/test_device_reduce.cpp b/test/rocprim/test_device_reduce.cpp index 376b6d98c..d7eaef88d 100644 --- a/test/rocprim/test_device_reduce.cpp +++ b/test/rocprim/test_device_reduce.cpp @@ -334,7 +334,6 @@ TYPED_TEST(RocprimDeviceReduceTests, ReduceSum) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -457,7 +456,6 @@ TYPED_TEST(RocprimDeviceReduceTests, ReduceArgMinimum) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -543,7 +541,6 @@ void testLargeIndices() gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -672,7 +669,6 @@ TYPED_TEST(RocprimDeviceReducePrecisionTests, ReduceSumInputEqualExponentFunctio gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_device_scan.cpp b/test/rocprim/test_device_scan.cpp index efb428b76..301dc432b 100644 --- a/test/rocprim/test_device_scan.cpp +++ b/test/rocprim/test_device_scan.cpp @@ -331,7 +331,6 @@ TYPED_TEST(RocprimDeviceScanTests, InclusiveScanEmptyInput) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); ASSERT_FALSE(out_of_bounds.get()); @@ -455,7 +454,6 @@ TYPED_TEST(RocprimDeviceScanTests, InclusiveScan) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -598,7 +596,6 @@ TYPED_TEST(RocprimDeviceScanTests, ExclusiveScan) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -748,7 +745,6 @@ TYPED_TEST(RocprimDeviceScanTests, InclusiveScanByKey) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -897,7 +893,6 @@ TYPED_TEST(RocprimDeviceScanTests, ExclusiveScanByKey) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -1059,7 +1054,6 @@ void testLargeIndicesInclusiveScan() gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -1177,7 +1171,6 @@ void testLargeIndicesExclusiveScan() gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -1422,8 +1415,6 @@ void large_indices_scan_by_key_test(ScanByKeyFun scan_by_key_fun) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); - const auto incorrect_flag = d_incorrect_flag.load()[0]; ASSERT_EQ(0, incorrect_flag); @@ -1669,7 +1660,6 @@ TYPED_TEST(RocprimDeviceScanFutureTests, ExclusiveScan) scan_op, stream, debug_synchronous))); - HIP_CHECK(hipGetLastError()); if(TestFixture::use_graphs) { diff --git a/test/rocprim/test_device_search.cpp b/test/rocprim/test_device_search.cpp index 7f2e84952..5fa2e572c 100644 --- a/test/rocprim/test_device_search.cpp +++ b/test/rocprim/test_device_search.cpp @@ -252,7 +252,6 @@ TYPED_TEST(RocprimDeviceSearchTests, Search) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -389,7 +388,6 @@ TYPED_TEST(RocprimDeviceSearchTests, SearchRepetition) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); const auto output = d_output.load()[0]; diff --git a/test/rocprim/test_device_search_n.cpp b/test/rocprim/test_device_search_n.cpp index 04da41d7c..70aa15828 100644 --- a/test/rocprim/test_device_search_n.cpp +++ b/test/rocprim/test_device_search_n.cpp @@ -197,7 +197,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, RandomTest) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -290,7 +289,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, MaxCount) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -483,7 +481,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, SmallCount) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -577,7 +574,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, StartFromBegin) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -671,7 +667,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, StartFromMiddle) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -765,7 +760,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, StartFromEnd) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -863,7 +857,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, StartFromEndButFail) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -983,7 +976,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, NoiseTest_1block) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -1103,7 +1095,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, NoiseTest_2block) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -1223,7 +1214,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, NoiseTest_3block) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -1345,7 +1335,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, MultiResult1) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected @@ -1460,7 +1449,6 @@ TYPED_TEST(RocprimDeviceSearchNTests, MultiResult2) gHelper.createAndLaunchGraph(stream); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); const auto expected diff --git a/test/rocprim/test_device_segmented_reduce.cpp b/test/rocprim/test_device_segmented_reduce.cpp index e2a4889b9..b15cf7da9 100644 --- a/test/rocprim/test_device_segmented_reduce.cpp +++ b/test/rocprim/test_device_segmented_reduce.cpp @@ -438,7 +438,6 @@ void testLargeIndices() gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_device_transform.cpp b/test/rocprim/test_device_transform.cpp index 47c4052de..bd2c69a45 100644 --- a/test/rocprim/test_device_transform.cpp +++ b/test/rocprim/test_device_transform.cpp @@ -186,7 +186,6 @@ TYPED_TEST(RocprimDeviceTransformTests, Transform) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -285,7 +284,6 @@ TYPED_TEST(RocprimDeviceTransformTests, BinaryTransform) gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host @@ -389,7 +387,6 @@ void testLargeIndices() gHelper.createAndLaunchGraph(stream, true, false); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_intrinsics.cpp b/test/rocprim/test_intrinsics.cpp index 9b4fddede..420786739 100644 --- a/test/rocprim/test_intrinsics.cpp +++ b/test/rocprim/test_intrinsics.cpp @@ -367,17 +367,18 @@ void test_shuffle() d_data.store(input); // Launching kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_kernel), - dim3(1), - dim3(hardware_warp_size), - 0, - 0, - d_data.get(), - delta, - logical_warp_size, - active_lanes); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_kernel), + dim3(1), + dim3(hardware_warp_size), + 0, + 0, + d_data.get(), + delta, + logical_warp_size, + active_lanes + ) + ); // Read from device memory const auto output = d_data.load(); @@ -467,16 +468,17 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleIndex) device_src_lanes.store(src_lanes); // Launching kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_index_kernel), - dim3(1), - dim3(hardware_warp_size), - 0, - 0, - device_data.get(), - device_src_lanes.get(), - logical_warp_size); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_index_kernel), + dim3(1), + dim3(hardware_warp_size), + 0, + 0, + device_data.get(), + device_src_lanes.get(), + logical_warp_size + ) + ); // Read from device memory const auto output = device_data.load(); @@ -513,13 +515,16 @@ TEST(RocprimIntrinsicsTests, LaneId) common::device_ptr d_output(size); - hipLaunchKernelGGL(lane_id_kernel, - dim3(blocks), - dim3(block_size), - 0, - hipStreamDefault, - d_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + lane_id_kernel, + dim3(blocks), + dim3(block_size), + 0, + hipStreamDefault, + d_output.get() + ) + ); const auto h_output = d_output.load(); @@ -622,16 +627,18 @@ TEST(RocprimIntrinsicsTests, MaskedBitCount) } } - hipLaunchKernelGGL(masked_bit_count_kernel, - dim3(blocks), - dim3(block_size), - 0, - hipStreamDefault, - d_output.get(), - d_input.get(), - add, - active_lanes); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(masked_bit_count_kernel, + dim3(blocks), + dim3(block_size), + 0, + hipStreamDefault, + d_output.get(), + d_input.get(), + add, + active_lanes + ) + ); const auto output = d_output.load(); @@ -739,15 +746,17 @@ void warp_any_all_test() d_input.store(input); - hipLaunchKernelGGL(HIP_KERNEL_NAME(warp_any_all_kernel), - dim3(blocks), - dim3(block_size), - 0, - hipStreamDefault, - d_output.get(), - d_input.get(), - active_lanes); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(warp_any_all_kernel), + dim3(blocks), + dim3(block_size), + 0, + hipStreamDefault, + d_output.get(), + d_input.get(), + active_lanes + ) + ); const auto output = d_output.load(); @@ -878,17 +887,19 @@ TYPED_TEST(RocprimIntrinsicsTests, WarpPermute) d_input.store(input); d_indices.store(indices); - hipLaunchKernelGGL(HIP_KERNEL_NAME(warp_permute_kernel), - dim3(blocks), - dim3(block_size), - 0, - hipStreamDefault, - d_output.get(), - d_input.get(), - d_indices.get(), - logical_warp_size, - active_lanes); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(warp_permute_kernel), + dim3(blocks), + dim3(block_size), + 0, + hipStreamDefault, + d_output.get(), + d_input.get(), + d_indices.get(), + logical_warp_size, + active_lanes + ) + ); const auto output = d_output.load(); @@ -991,16 +1002,18 @@ TEST(RocprimIntrinsicsTests, MatchAny) d_input.store(input); - hipLaunchKernelGGL(HIP_KERNEL_NAME(match_any_kernel), - dim3(blocks), - dim3(block_size), - 0, - hipStreamDefault, - d_output.get(), - d_input.get(), - active_lanes, - lane_predicates); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(match_any_kernel), + dim3(blocks), + dim3(block_size), + 0, + hipStreamDefault, + d_output.get(), + d_input.get(), + active_lanes, + lane_predicates + ) + ); const auto output = d_output.load(); @@ -1080,15 +1093,17 @@ TEST(RocprimIntrinsicsTests, Ballot) d_input.store(input); - hipLaunchKernelGGL(ballot_kernel, - dim3(blocks), - dim3(block_size), - 0, - hipStreamDefault, - d_output.get(), - d_input.get(), - active_lanes); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(ballot_kernel, + dim3(blocks), + dim3(block_size), + 0, + hipStreamDefault, + d_output.get(), + d_input.get(), + active_lanes + ) + ); const auto output = d_output.load(); @@ -1179,15 +1194,17 @@ TEST(RocprimIntrinsicsTests, GroupElect) d_input.store(input); d_output.store(output); - hipLaunchKernelGGL(HIP_KERNEL_NAME(group_elect_kernel), - dim3(blocks), - dim3(block_size), - 0, - hipStreamDefault, - d_output.get(), - d_input.get(), - warps_per_block); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(group_elect_kernel), + dim3(blocks), + dim3(block_size), + 0, + hipStreamDefault, + d_output.get(), + d_input.get(), + warps_per_block + ) + ); output = d_output.load(); diff --git a/test/rocprim/test_lookback_reproducibility.cpp b/test/rocprim/test_lookback_reproducibility.cpp index fd58d3f08..ea0252243 100644 --- a/test/rocprim/test_lookback_reproducibility.cpp +++ b/test/rocprim/test_lookback_reproducibility.cpp @@ -204,7 +204,6 @@ TYPED_TEST(RocprimLookbackReproducibilityTests, Scan) test_scan_op, stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); std::vector output = d_output.load(); return output; @@ -292,7 +291,6 @@ TYPED_TEST(RocprimLookbackReproducibilityTests, ScanByKey) compare_op, stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); std::vector output = d_output.load(); return output; @@ -382,7 +380,6 @@ TYPED_TEST(RocprimLookbackReproducibilityTests, ReduceByKey) compare_op, stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); [[maybe_unused]] size_t unique_count_output = d_unique_count_output.load()[0]; diff --git a/test/rocprim/test_texture_cache_iterator.cpp b/test/rocprim/test_texture_cache_iterator.cpp index 1dc6a04eb..337ff4700 100644 --- a/test/rocprim/test_texture_cache_iterator.cpp +++ b/test/rocprim/test_texture_cache_iterator.cpp @@ -127,7 +127,6 @@ TYPED_TEST(RocprimTextureCacheIteratorTests, Transform) // Run HIP_CHECK( rocprim::transform(x, d_output.get(), size, transform(), stream, debug_synchronous)); - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Copy output to host diff --git a/test/rocprim/test_thread.cpp b/test/rocprim/test_thread.cpp index 504e6cc71..a72aa7040 100644 --- a/test/rocprim/test_thread.cpp +++ b/test/rocprim/test_thread.cpp @@ -115,14 +115,16 @@ TYPED_TEST(RocprimThreadTests, FlatBlockThreadID) common::device_ptr device_output(block_size); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(flat_id_kernel), - dim3(1), - dim3(block_size_x, block_size_y, block_size_z), - 0, - 0, - device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(flat_id_kernel), + dim3(1), + dim3(block_size_x, block_size_y, block_size_z), + 0, + 0, + device_output.get() + ) + ); // Reading results from device output = device_output.load(); @@ -181,14 +183,16 @@ TYPED_TEST(RocprimThreadTests, FlatBlockID) // Preparing device common::device_ptr device_output(block_size); // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(block_id_kernel), - dim3(block_size_x, block_size_y, block_size_z), - dim3(block_size_x, block_size_y, block_size_z), - 0, - 0, - device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(block_id_kernel), + dim3(block_size_x, block_size_y, block_size_z), + dim3(block_size_x, block_size_y, block_size_z), + 0, + 0, + device_output.get() + ) + ); // Reading results from device output = device_output.load(); diff --git a/test/rocprim/test_thread_algos.cpp b/test/rocprim/test_thread_algos.cpp index d2e6644cc..3a67843c5 100644 --- a/test/rocprim/test_thread_algos.cpp +++ b/test/rocprim/test_thread_algos.cpp @@ -144,8 +144,17 @@ TYPED_TEST(RocprimThreadOperationTests, Load) common::device_ptr device_input(input); common::device_ptr device_output(input.size()); - thread_load_kernel<<>>(device_input.get(), device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(thread_load_kernel), + grid_size, + block_size, + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results back output = device_output.load(); @@ -267,8 +276,17 @@ TYPED_TEST(RocprimThreadOperationTests, StoreNontemporal) common::device_ptr device_input(input); common::device_ptr device_output(input.size()); - thread_store_kernel<<>>(device_input.get(), device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(thread_store_kernel), + grid_size, + block_size, + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results back output = device_output.load(); @@ -335,9 +353,17 @@ TYPED_TEST(RocprimThreadOperationTests, Reduction) common::device_ptr device_input(input); common::device_ptr device_output(input.size()); - thread_reduce_kernel - <<>>(device_input.get(), device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(thread_reduce_kernel), + grid_size, + block_size, + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results back output = device_output.load(); @@ -401,9 +427,17 @@ TYPED_TEST(RocprimThreadOperationTests, Scan) common::device_ptr device_input(input); common::device_ptr device_output(input.size()); - thread_scan_kernel - <<>>(device_input.get(), device_output.get()); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(thread_scan_kernel), + grid_size, + block_size, + 0, + 0, + device_input.get(), + device_output.get() + ) + ); // Reading results back output = device_output.load(); @@ -507,25 +541,39 @@ void merge_path_search_test() common::device_ptr device_output_oob_x(1); common::device_ptr device_output_oob_y(1); - thread_search_kernel - <<>>(device_input1.get(), - device_input2.get(), - device_output_x.get(), - device_output_y.get(), - input1.size(), - input2.size(), - bin_op); - HIP_CHECK(hipGetLastError()); - - thread_search_out_of_bounds_kernel - <<>>(device_input1.get(), - device_input2.get(), - device_output_oob_x.get(), - device_output_oob_y.get(), - input1.size(), - input2.size(), - bin_op); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(thread_search_kernel), + grid_size, + block_size, + 0, + 0, + device_input1.get(), + device_input2.get(), + device_output_x.get(), + device_output_y.get(), + input1.size(), + input2.size(), + bin_op + ) + ); + + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(thread_search_out_of_bounds_kernel), + grid_size, + block_size, + 0, + 0, + device_input1.get(), + device_input2.get(), + device_output_oob_x.get(), + device_output_oob_y.get(), + input1.size(), + input2.size(), + bin_op + ) + ); // Reading results back output_x = device_output_x.load(); diff --git a/test/rocprim/test_transform_iterator.cpp b/test/rocprim/test_transform_iterator.cpp index 3b3c43f49..5a6154790 100644 --- a/test/rocprim/test_transform_iterator.cpp +++ b/test/rocprim/test_transform_iterator.cpp @@ -156,7 +156,7 @@ TYPED_TEST(RocprimTransformIteratorTests, TransformReduce) reduce_op, stream, TestFixture::debug_synchronous)); - HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); output = d_output.load(); diff --git a/test/rocprim/test_warp_exchange.cpp b/test/rocprim/test_warp_exchange.cpp index d0d2567c3..1600193a6 100644 --- a/test/rocprim/test_warp_exchange.cpp +++ b/test/rocprim/test_warp_exchange.cpp @@ -254,10 +254,17 @@ TYPED_TEST(WarpExchangeTest, WarpExchange) common::device_ptr d_output(items_count); HIP_CHECK(hipMemset(d_output.get(), 0, items_count * sizeof(T))); - warp_exchange_kernel - <<>>(d_input.get(), d_output.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exchange_kernel), + dim3(1), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get() + ) + ); auto output = d_output.load(); @@ -300,10 +307,18 @@ TYPED_TEST(WarpExchangeTest, WarpExchangeNotInplace) common::device_ptr d_output(items_count); HIP_CHECK(hipMemset(d_output.get(), 0, items_count * sizeof(T))); - warp_exchange_kernel - <<>>(d_input.get(), d_output.get(), false); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exchange_kernel), + dim3(1), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get(), + false + ) + ); auto output = d_output.load(); @@ -408,10 +423,18 @@ TYPED_TEST(WarpExchangeScatterTest, WarpExchangeScatter) common::device_ptr d_output(items_count); HIP_CHECK(hipMemset(d_output.get(), 0, items_count * sizeof(T))); - warp_exchange_scatter_kernel - <<>>(d_input.get(), d_output.get(), d_ranks.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exchange_scatter_kernel), + dim3(1), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get(), + d_ranks.get() + ) + ); auto output = d_output.load(); diff --git a/test/rocprim/test_warp_load.cpp b/test/rocprim/test_warp_load.cpp index 2dd6d9935..e1e05cef8 100644 --- a/test/rocprim/test_warp_load.cpp +++ b/test/rocprim/test_warp_load.cpp @@ -264,10 +264,17 @@ TYPED_TEST(WarpLoadTest, WarpLoad) common::device_ptr d_input(input); common::device_ptr d_output(items_count); - warp_load_kernel - <<>>(d_input.get(), d_output.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_load_kernel), + dim3(1), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get() + ) + ); std::vector output = d_output.load(); @@ -301,13 +308,19 @@ TYPED_TEST(WarpLoadTest, WarpLoadGuarded) common::device_ptr d_input(input); common::device_ptr d_output(items_count); - warp_load_guarded_kernel - <<>>(d_input.get(), - d_output.get(), - valid_items, - oob_default); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_load_guarded_kernel), + dim3(1), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get(), + valid_items, + oob_default + ) + ); std::vector output = d_output.load(); diff --git a/test/rocprim/test_warp_reduce.hpp b/test/rocprim/test_warp_reduce.hpp index bb1746442..f1248903e 100644 --- a/test/rocprim/test_warp_reduce.hpp +++ b/test/rocprim/test_warp_reduce.hpp @@ -116,28 +116,33 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceSum) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size / block_size_ws32), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size / block_size_ws32), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size / block_size_ws64), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size / block_size_ws64), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -226,28 +231,33 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, AllReduceSum) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_allreduce_sum_kernel), - dim3(size / block_size_ws32), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_allreduce_sum_kernel), + dim3(size / block_size_ws32), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_allreduce_sum_kernel), - dim3(size / block_size_ws64), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_allreduce_sum_kernel), + dim3(size / block_size_ws64), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -333,30 +343,35 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceSumValid) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size / block_size_ws32), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get(), - valid); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size / block_size_ws32), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get(), + valid + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size / block_size_ws64), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get(), - valid); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size / block_size_ws64), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get(), + valid + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -447,30 +462,35 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, AllReduceSumValid) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_allreduce_sum_kernel), - dim3(size / block_size_ws32), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get(), - valid); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_allreduce_sum_kernel), + dim3(size / block_size_ws32), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get(), + valid + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_allreduce_sum_kernel), - dim3(size / block_size_ws64), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get(), - valid); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_allreduce_sum_kernel), + dim3(size / block_size_ws64), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get(), + valid + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -566,28 +586,33 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceCustomStruct) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size / block_size_ws32), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size / block_size_ws32), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size / block_size_ws64), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size / block_size_ws64), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -688,36 +713,41 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, HeadSegmentedReduceSum) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(head_segmented_warp_reduce_kernel), - dim3(size / block_size_ws32), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_flags.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(head_segmented_warp_reduce_kernel), + dim3(size / block_size_ws32), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_flags.get(), + device_output.get() + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(head_segmented_warp_reduce_kernel), - dim3(size / block_size_ws64), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_flags.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(head_segmented_warp_reduce_kernel), + dim3(size / block_size_ws64), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_flags.get(), + device_output.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -839,36 +869,41 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, TailSegmentedReduceSum) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(tail_segmented_warp_reduce_kernel), - dim3(size / block_size_ws32), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_flags.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(tail_segmented_warp_reduce_kernel), + dim3(size / block_size_ws32), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_flags.get(), + device_output.get() + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(tail_segmented_warp_reduce_kernel), - dim3(size / block_size_ws64), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_flags.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(tail_segmented_warp_reduce_kernel), + dim3(size / block_size_ws64), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_flags.get(), + device_output.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory diff --git a/test/rocprim/test_warp_scan.hpp b/test/rocprim/test_warp_scan.hpp index e4bb9be1b..9546937ca 100644 --- a/test/rocprim/test_warp_scan.hpp +++ b/test/rocprim/test_warp_scan.hpp @@ -117,28 +117,33 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScan) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_inclusive_scan_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_inclusive_scan_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -351,32 +356,37 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScanReduce) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - warp_inclusive_scan_reduce_kernel), - dim3(grid_size), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get(), - device_output_reductions.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + warp_inclusive_scan_reduce_kernel), + dim3(grid_size), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get(), + device_output_reductions.get() + ) + ); } else if(current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - warp_inclusive_scan_reduce_kernel), - dim3(grid_size), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get(), - device_output_reductions.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + warp_inclusive_scan_reduce_kernel), + dim3(grid_size), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get(), + device_output_reductions.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -601,30 +611,35 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveScan) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_exclusive_scan_kernel), - dim3(grid_size), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get(), - init); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exclusive_scan_kernel), + dim3(grid_size), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get(), + init + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_exclusive_scan_kernel), - dim3(grid_size), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get(), - init); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exclusive_scan_kernel), + dim3(grid_size), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get(), + init + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -832,30 +847,35 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveScanWoInit) // Launching kernel if(current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - warp_exclusive_scan_wo_init_kernel), - dim3(grid_size), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + warp_exclusive_scan_wo_init_kernel), + dim3(grid_size), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } else if(current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - warp_exclusive_scan_wo_init_kernel), - dim3(grid_size), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + warp_exclusive_scan_wo_init_kernel), + dim3(grid_size), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -963,33 +983,38 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveReduceScan) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - warp_exclusive_scan_reduce_kernel), - dim3(grid_size), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get(), - device_output_reductions.get(), - init); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + warp_exclusive_scan_reduce_kernel), + dim3(grid_size), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get(), + device_output_reductions.get(), + init + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - warp_exclusive_scan_reduce_kernel), - dim3(grid_size), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get(), - device_output_reductions.get(), - init); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + warp_exclusive_scan_reduce_kernel), + dim3(grid_size), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get(), + device_output_reductions.get(), + init + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -1102,33 +1127,38 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveReduceScanWoInit) // Launching kernel if(current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_exclusive_scan_reduce_wo_init_kernel), - dim3(grid_size), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get(), - device_output_reductions.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exclusive_scan_reduce_wo_init_kernel), + dim3(grid_size), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get(), + device_output_reductions.get() + ) + ); } else if(current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_exclusive_scan_reduce_wo_init_kernel), - dim3(grid_size), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get(), - device_output_reductions.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exclusive_scan_reduce_wo_init_kernel), + dim3(grid_size), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get(), + device_output_reductions.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -1237,32 +1267,37 @@ typed_test_def(RocprimWarpScanTests, name_suffix, Scan) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_scan_kernel), - dim3(grid_size), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_inclusive_output.get(), - device_exclusive_output.get(), - init); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_scan_kernel), + dim3(grid_size), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_inclusive_output.get(), + device_exclusive_output.get(), + init + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_scan_kernel), - dim3(grid_size), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_inclusive_output.get(), - device_exclusive_output.get(), - init); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_scan_kernel), + dim3(grid_size), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_inclusive_output.get(), + device_exclusive_output.get(), + init + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -1370,34 +1405,39 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ScanReduce) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_scan_reduce_kernel), - dim3(grid_size), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_inclusive_output.get(), - device_exclusive_output.get(), - device_output_reductions.get(), - init); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_scan_reduce_kernel), + dim3(grid_size), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_inclusive_output.get(), + device_exclusive_output.get(), + device_output_reductions.get(), + init + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_scan_reduce_kernel), - dim3(grid_size), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_inclusive_output.get(), - device_exclusive_output.get(), - device_output_reductions.get(), - init); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_scan_reduce_kernel), + dim3(grid_size), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_inclusive_output.get(), + device_exclusive_output.get(), + device_output_reductions.get(), + init + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory @@ -1506,28 +1546,33 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScanCustomType) // Launching kernel if (current_device_warp_size == ws32) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_kernel), - dim3(grid_size), - dim3(block_size_ws32), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_inclusive_scan_kernel), + dim3(grid_size), + dim3(block_size_ws32), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } else if (current_device_warp_size == ws64) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_kernel), - dim3(grid_size), - dim3(block_size_ws64), - 0, - 0, - device_input.get(), - device_output.get()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_inclusive_scan_kernel), + dim3(grid_size), + dim3(block_size_ws64), + 0, + 0, + device_input.get(), + device_output.get() + ) + ); } - HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Read from device memory diff --git a/test/rocprim/test_warp_sort.hpp b/test/rocprim/test_warp_sort.hpp index 8c1f32fb5..864e2bab7 100644 --- a/test/rocprim/test_warp_sort.hpp +++ b/test/rocprim/test_warp_sort.hpp @@ -100,16 +100,16 @@ typed_test_def(RocprimWarpSortShuffleBasedTests, name_suffix, Sort) common::device_ptr d_output(output); // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(test_hip_warp_sort), - dim3(grid_size), - dim3(block_size), - 0, - 0, - d_output.get()); - - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(test_hip_warp_sort), + dim3(grid_size), + dim3(block_size), + 0, + 0, + d_output.get() + ) + ); // Read from device memory output = d_output.load(); @@ -190,20 +190,20 @@ typed_test_def(RocprimWarpSortShuffleBasedTests, name_suffix, SortKeyInt) common::device_ptr d_output_value(output_value); // Launching kernel - hipLaunchKernelGGL(HIP_KERNEL_NAME(test_hip_sort_key_value_kernel), - dim3(grid_size), - dim3(block_size), - 0, - 0, - d_output_key.get(), - d_output_value.get()); - - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(test_hip_sort_key_value_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + d_output_key.get(), + d_output_value.get() + ) + ); // Read from device memory output_key = d_output_key.load(); diff --git a/test/rocprim/test_warp_store.cpp b/test/rocprim/test_warp_store.cpp index f28b527e7..e9af6a476 100644 --- a/test/rocprim/test_warp_store.cpp +++ b/test/rocprim/test_warp_store.cpp @@ -251,10 +251,17 @@ TYPED_TEST(WarpStoreTest, WarpLoad) common::device_ptr d_input(input); common::device_ptr d_output(items_count); - warp_store_kernel - <<>>(d_input.get(), d_output.get()); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_store_kernel), + dim3(1), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get() + ) + ); std::vector output = d_output.load(); @@ -287,10 +294,18 @@ TYPED_TEST(WarpStoreTest, WarpStoreGuarded) common::device_ptr d_output(items_count); HIP_CHECK(hipMemset(d_output.get(), 0, items_count * sizeof(T))); - warp_store_guarded_kernel - <<>>(d_input.get(), d_output.get(), valid_items); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_store_guarded_kernel), + dim3(1), + dim3(block_size), + 0, + 0, + d_input.get(), + d_output.get(), + valid_items + ) + ); std::vector output = d_output.load(); From 4fafa0a17b8071ccd81f2c3f807dcbe8f6fc6727 Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Fri, 28 Feb 2025 13:07:59 -0500 Subject: [PATCH 2/5] Clear HIP error in public device API functions The behaviour of hipGetLastError will change in the future. With the changes, the error it records will only be cleared on each call to hipGetLastError. Call hipGetLastError at the beginning of public device API functions, since they may call hipGetLastError internally, and we don't want that call to report an error that happened before the function was invoked. --- .../device/device_adjacent_difference.hpp | 18 ++++ .../rocprim/device/device_adjacent_find.hpp | 6 ++ .../rocprim/device/device_binary_search.hpp | 12 +++ .../include/rocprim/device/device_copy.hpp | 3 + .../rocprim/device/device_find_end.hpp | 3 + .../rocprim/device/device_find_first_of.hpp | 3 + .../rocprim/device/device_histogram.hpp | 24 +++++ .../include/rocprim/device/device_memcpy.hpp | 3 + .../include/rocprim/device/device_merge.hpp | 6 ++ .../rocprim/device/device_merge_sort.hpp | 6 ++ .../rocprim/device/device_nth_element.hpp | 6 ++ .../rocprim/device/device_partial_sort.hpp | 6 ++ .../rocprim/device/device_partition.hpp | 15 +++ .../rocprim/device/device_radix_sort.hpp | 96 +++++++++++++++++++ .../include/rocprim/device/device_reduce.hpp | 6 ++ .../rocprim/device/device_reduce_by_key.hpp | 6 ++ .../device/device_run_length_encode.hpp | 3 + .../include/rocprim/device/device_scan.hpp | 12 +++ .../rocprim/device/device_scan_by_key.hpp | 12 +++ .../include/rocprim/device/device_search.hpp | 3 + .../rocprim/device/device_search_n.hpp | 3 + .../device/device_segmented_radix_sort.hpp | 24 +++++ .../device/device_segmented_reduce.hpp | 3 + .../rocprim/device/device_segmented_scan.hpp | 12 +++ .../include/rocprim/device/device_select.hpp | 15 +++ .../rocprim/device/device_transform.hpp | 6 ++ 26 files changed, 312 insertions(+) diff --git a/rocprim/include/rocprim/device/device_adjacent_difference.hpp b/rocprim/include/rocprim/device/device_adjacent_difference.hpp index dc3a0696b..dd309c4ee 100644 --- a/rocprim/include/rocprim/device/device_adjacent_difference.hpp +++ b/rocprim/include/rocprim/device/device_adjacent_difference.hpp @@ -310,6 +310,9 @@ hipError_t adjacent_difference(void* const temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + static constexpr bool in_place = false; static constexpr bool right = false; return detail::adjacent_difference_impl( @@ -361,6 +364,9 @@ hipError_t adjacent_difference_inplace(void* const temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + static constexpr bool in_place = true; static constexpr bool right = false; return detail::adjacent_difference_impl( @@ -413,6 +419,9 @@ hipError_t adjacent_difference_inplace(void* const temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + static constexpr bool in_place = true; static constexpr bool right = false; return detail::adjacent_difference_impl(temporary_storage, @@ -515,6 +524,9 @@ hipError_t adjacent_difference_right(void* const temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + static constexpr bool in_place = false; static constexpr bool right = true; return detail::adjacent_difference_impl( @@ -566,6 +578,9 @@ hipError_t adjacent_difference_right_inplace(void* const temporary_stor const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + static constexpr bool in_place = true; static constexpr bool right = true; return detail::adjacent_difference_impl( @@ -617,6 +632,9 @@ hipError_t adjacent_difference_right_inplace(void* const temporary_stor const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + static constexpr bool in_place = true; static constexpr bool right = true; return detail::adjacent_difference_impl(temporary_storage, diff --git a/rocprim/include/rocprim/device/device_adjacent_find.hpp b/rocprim/include/rocprim/device/device_adjacent_find.hpp index 1ed0fc4bb..cca9a3dd8 100644 --- a/rocprim/include/rocprim/device/device_adjacent_find.hpp +++ b/rocprim/include/rocprim/device/device_adjacent_find.hpp @@ -56,6 +56,9 @@ hipError_t adjacent_find_impl(void* const temporary_storage, const hipStream_t stream, const bool debug_synchronous) { + // Clear any existing error + (void) hipGetLastError(); + // Data types using input_type = typename std::iterator_traits::value_type; using op_result_type = bool; @@ -292,6 +295,9 @@ hipError_t adjacent_find(void* const temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::adjacent_find_impl(temporary_storage, storage_size, input, diff --git a/rocprim/include/rocprim/device/device_binary_search.hpp b/rocprim/include/rocprim/device/device_binary_search.hpp index 31c73ab28..dfe148b04 100644 --- a/rocprim/include/rocprim/device/device_binary_search.hpp +++ b/rocprim/include/rocprim/device/device_binary_search.hpp @@ -212,6 +212,9 @@ hipError_t lower_bound(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + static_assert(detail::is_default_or_has_tag::value, "Config must be a specialization of struct template lower_bound_config"); @@ -349,6 +352,12 @@ hipError_t upper_bound(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + + // Clear any existing error + (void) hipGetLastError(); + static_assert(detail::is_default_or_has_tag::value, "Config must be a specialization of struct template upper_bound_config"); using value_type = typename std::iterator_traits::value_type; @@ -480,6 +489,9 @@ hipError_t binary_search(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + static_assert(detail::is_default_or_has_tag::value, "Config must be a specialization of struct template binary_search_config"); using value_type = typename std::iterator_traits::value_type; diff --git a/rocprim/include/rocprim/device/device_copy.hpp b/rocprim/include/rocprim/device/device_copy.hpp index 57c526907..ccf8b1de1 100644 --- a/rocprim/include/rocprim/device/device_copy.hpp +++ b/rocprim/include/rocprim/device/device_copy.hpp @@ -133,6 +133,9 @@ ROCPRIM_INLINE static hipError_t batch_copy(void* temporary_storage hipStream_t stream = hipStreamDefault, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail:: batch_memcpy_func( temporary_storage, diff --git a/rocprim/include/rocprim/device/device_find_end.hpp b/rocprim/include/rocprim/device/device_find_end.hpp index 1e35f3426..491434c00 100644 --- a/rocprim/include/rocprim/device/device_find_end.hpp +++ b/rocprim/include/rocprim/device/device_find_end.hpp @@ -135,6 +135,9 @@ hipError_t find_end(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::search_impl(temporary_storage, storage_size, input, diff --git a/rocprim/include/rocprim/device/device_find_first_of.hpp b/rocprim/include/rocprim/device/device_find_first_of.hpp index c77142426..49ce1cdf8 100644 --- a/rocprim/include/rocprim/device/device_find_first_of.hpp +++ b/rocprim/include/rocprim/device/device_find_first_of.hpp @@ -366,6 +366,9 @@ hipError_t find_first_of(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::find_first_of_impl(temporary_storage, storage_size, input, diff --git a/rocprim/include/rocprim/device/device_histogram.hpp b/rocprim/include/rocprim/device/device_histogram.hpp index 6f9fe0f7d..daced544f 100644 --- a/rocprim/include/rocprim/device/device_histogram.hpp +++ b/rocprim/include/rocprim/device/device_histogram.hpp @@ -502,6 +502,9 @@ inline hipError_t histogram_even(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + Counter* histogram_single[1] = {histogram}; unsigned int levels_single[1] = {levels}; Level lower_level_single[1] = {lower_level}; @@ -611,6 +614,9 @@ inline hipError_t histogram_even(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + Counter* histogram_single[1] = {histogram}; unsigned int levels_single[1] = {levels}; Level lower_level_single[1] = {lower_level}; @@ -731,6 +737,9 @@ inline hipError_t multi_histogram_even(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::histogram_even_impl(temporary_storage, storage_size, samples, @@ -856,6 +865,9 @@ inline hipError_t multi_histogram_even(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::histogram_even_impl(temporary_storage, storage_size, samples, @@ -946,6 +958,9 @@ inline hipError_t histogram_range(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + Counter* histogram_single[1] = {histogram}; unsigned int levels_single[1] = {levels}; Level* level_values_single[1] = {level_values}; @@ -1049,6 +1064,9 @@ inline hipError_t histogram_range(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + Counter* histogram_single[1] = {histogram}; unsigned int levels_single[1] = {levels}; Level* level_values_single[1] = {level_values}; @@ -1162,6 +1180,9 @@ inline hipError_t multi_histogram_range(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::histogram_range_impl(temporary_storage, storage_size, samples, @@ -1282,6 +1303,9 @@ inline hipError_t multi_histogram_range(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::histogram_range_impl(temporary_storage, storage_size, samples, diff --git a/rocprim/include/rocprim/device/device_memcpy.hpp b/rocprim/include/rocprim/device/device_memcpy.hpp index e696a5165..c8af965eb 100644 --- a/rocprim/include/rocprim/device/device_memcpy.hpp +++ b/rocprim/include/rocprim/device/device_memcpy.hpp @@ -130,6 +130,9 @@ ROCPRIM_INLINE static hipError_t batch_memcpy(void* temporary_stora hipStream_t stream = hipStreamDefault, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail:: batch_memcpy_func( temporary_storage, diff --git a/rocprim/include/rocprim/device/device_merge.hpp b/rocprim/include/rocprim/device/device_merge.hpp index e8fc0d2a2..3faa7092f 100644 --- a/rocprim/include/rocprim/device/device_merge.hpp +++ b/rocprim/include/rocprim/device/device_merge.hpp @@ -303,6 +303,9 @@ hipError_t merge(void * temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + empty_type * values = nullptr; return detail::merge_impl( temporary_storage, storage_size, @@ -430,6 +433,9 @@ hipError_t merge(void * temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::merge_impl( temporary_storage, storage_size, keys_input1, keys_input2, keys_output, diff --git a/rocprim/include/rocprim/device/device_merge_sort.hpp b/rocprim/include/rocprim/device/device_merge_sort.hpp index 82a2493c0..1f347b86c 100644 --- a/rocprim/include/rocprim/device/device_merge_sort.hpp +++ b/rocprim/include/rocprim/device/device_merge_sort.hpp @@ -930,6 +930,9 @@ hipError_t merge_sort(void * temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + empty_type * values = nullptr; return detail::merge_sort_impl( temporary_storage, storage_size, @@ -1044,6 +1047,9 @@ hipError_t merge_sort(void * temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::merge_sort_impl( temporary_storage, storage_size, keys_input, keys_output, values_input, values_output, size, diff --git a/rocprim/include/rocprim/device/device_nth_element.hpp b/rocprim/include/rocprim/device/device_nth_element.hpp index 3f07dc22b..7a3ac129b 100644 --- a/rocprim/include/rocprim/device/device_nth_element.hpp +++ b/rocprim/include/rocprim/device/device_nth_element.hpp @@ -258,6 +258,9 @@ hipError_t nth_element(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::nth_element_impl(temporary_storage, storage_size, keys, @@ -370,6 +373,9 @@ hipError_t nth_element(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using key_type = typename std::iterator_traits::value_type; static_assert( std::is_same::value_type; static_assert( std::is_same( temporary_storage, diff --git a/rocprim/include/rocprim/device/device_partition.hpp b/rocprim/include/rocprim/device/device_partition.hpp index e7e8d6d3f..f18aa9602 100644 --- a/rocprim/include/rocprim/device/device_partition.hpp +++ b/rocprim/include/rocprim/device/device_partition.hpp @@ -527,6 +527,9 @@ inline hipError_t partition_two_way(void* temporary_storag const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using flag_type = ::rocprim::empty_type; //dummy using inequality_op_type = ::rocprim::empty_type; //dummy using offset_type = unsigned int; @@ -667,6 +670,9 @@ inline hipError_t partition_two_way(void* temporary_storag const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using unary_predicate_type = ::rocprim::empty_type; // dummy using inequality_op_type = ::rocprim::empty_type; // dummy using offset_type = unsigned int; @@ -792,6 +798,9 @@ hipError_t partition(void * temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using unary_predicate_type = ::rocprim::empty_type; // dummy using inequality_op_type = ::rocprim::empty_type; // dummy using offset_type = unsigned int; @@ -923,6 +932,9 @@ hipError_t partition(void * temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using flag_type = ::rocprim::empty_type; //dummy using inequality_op_type = ::rocprim::empty_type; //dummy using offset_type = unsigned int; @@ -1102,6 +1114,9 @@ hipError_t partition_three_way(void * temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // Dummy flag type using flag_type = ::rocprim::empty_type; flag_type * flags = nullptr; diff --git a/rocprim/include/rocprim/device/device_radix_sort.hpp b/rocprim/include/rocprim/device/device_radix_sort.hpp index 04746e8fa..68c6c84e5 100644 --- a/rocprim/include/rocprim/device/device_radix_sort.hpp +++ b/rocprim/include/rocprim/device/device_radix_sort.hpp @@ -850,6 +850,10 @@ hipError_t radix_sort_keys(void* temporary_storage, bool debug_synchronous = false) { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type * values = nullptr; bool ignored; return detail::radix_sort_impl(temporary_storage, @@ -967,6 +971,10 @@ hipError_t radix_sort_keys(void* temporary_storage, bool debug_synchronous = false) { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type * values = nullptr; bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, @@ -1117,6 +1125,10 @@ auto radix_sort_keys(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool ignored; return detail::radix_sort_impl(temporary_storage, @@ -1249,6 +1261,10 @@ auto radix_sort_keys(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool ignored; return detail::radix_sort_impl( @@ -1392,6 +1408,10 @@ auto radix_sort_keys(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, @@ -1526,6 +1546,10 @@ auto radix_sort_keys(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool is_result_in_output; hipError_t error = detail::radix_sort_impl( @@ -1651,6 +1675,10 @@ hipError_t radix_sort_keys_desc(void* temporary_storage, bool debug_synchronous = false) { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool ignored; return detail::radix_sort_impl(temporary_storage, @@ -1768,6 +1796,10 @@ hipError_t radix_sort_keys_desc(void* temporary_storage, bool debug_synchronous = false) { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, @@ -1918,6 +1950,10 @@ auto radix_sort_keys_desc(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool ignored; return detail::radix_sort_impl(temporary_storage, @@ -2050,6 +2086,10 @@ auto radix_sort_keys_desc(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool ignored; return detail::radix_sort_impl( @@ -2193,6 +2233,10 @@ auto radix_sort_keys_desc(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, @@ -2327,6 +2371,10 @@ auto radix_sort_keys_desc(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + empty_type* values = nullptr; bool is_result_in_output; hipError_t error @@ -2471,6 +2519,10 @@ hipError_t radix_sort_pairs(void* temporary_storage, bool debug_synchronous = false) { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool ignored; return detail::radix_sort_impl(temporary_storage, storage_size, @@ -2601,6 +2653,10 @@ hipError_t radix_sort_pairs(void* temporary_storage, bool debug_synchronous = false) { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, storage_size, @@ -2770,6 +2826,10 @@ auto radix_sort_pairs(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool ignored; return detail::radix_sort_impl(temporary_storage, storage_size, @@ -2916,6 +2976,10 @@ auto radix_sort_pairs(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool ignored; return detail::radix_sort_impl( temporary_storage, @@ -3070,6 +3134,10 @@ auto radix_sort_pairs(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, storage_size, @@ -3212,6 +3280,10 @@ auto radix_sort_pairs(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool is_result_in_output; hipError_t error = detail::radix_sort_impl( temporary_storage, @@ -3352,6 +3424,10 @@ hipError_t radix_sort_pairs_desc(void* temporary_storage, bool debug_synchronous = false) { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool ignored; return detail::radix_sort_impl(temporary_storage, storage_size, @@ -3476,6 +3552,10 @@ hipError_t radix_sort_pairs_desc(void* temporary_storage, bool debug_synchronous = false) { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, storage_size, @@ -3645,6 +3725,10 @@ auto radix_sort_pairs_desc(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool ignored; return detail::radix_sort_impl(temporary_storage, storage_size, @@ -3791,6 +3875,10 @@ auto radix_sort_pairs_desc(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool ignored; return detail::radix_sort_impl( temporary_storage, @@ -3945,6 +4033,10 @@ auto radix_sort_pairs_desc(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, storage_size, @@ -4087,6 +4179,10 @@ auto radix_sort_pairs_desc(void* temporary_storage, -> std::enable_if_t::value, hipError_t> { static_assert(std::is_integral::value, "Size must be an integral type."); + + // Clear any existing error + (void) hipGetLastError(); + bool is_result_in_output; hipError_t error = detail::radix_sort_impl(temporary_storage, diff --git a/rocprim/include/rocprim/device/device_reduce.hpp b/rocprim/include/rocprim/device/device_reduce.hpp index 65e17563c..862c922b7 100644 --- a/rocprim/include/rocprim/device/device_reduce.hpp +++ b/rocprim/include/rocprim/device/device_reduce.hpp @@ -429,6 +429,9 @@ inline hipError_t reduce(void* temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::reduce_impl(temporary_storage, storage_size, input, @@ -559,6 +562,9 @@ inline hipError_t reduce(void* temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using input_type = typename std::iterator_traits::value_type; return detail::reduce_impl(temporary_storage, diff --git a/rocprim/include/rocprim/device/device_reduce_by_key.hpp b/rocprim/include/rocprim/device/device_reduce_by_key.hpp index aabdaba8d..a47c4f44f 100644 --- a/rocprim/include/rocprim/device/device_reduce_by_key.hpp +++ b/rocprim/include/rocprim/device/device_reduce_by_key.hpp @@ -504,6 +504,9 @@ inline hipError_t reduce_by_key(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::reduce_by_key_impl(temporary_storage, storage_size, @@ -550,6 +553,9 @@ inline hipError_t deterministic_reduce_by_key(void* temporar hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::reduce_by_key_impl( temporary_storage, storage_size, diff --git a/rocprim/include/rocprim/device/device_run_length_encode.hpp b/rocprim/include/rocprim/device/device_run_length_encode.hpp index 88e05138e..79a913659 100644 --- a/rocprim/include/rocprim/device/device_run_length_encode.hpp +++ b/rocprim/include/rocprim/device/device_run_length_encode.hpp @@ -356,6 +356,9 @@ inline hipError_t run_length_encode(void* temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using input_type = typename std::iterator_traits::value_type; using count_type = unsigned int; diff --git a/rocprim/include/rocprim/device/device_scan.hpp b/rocprim/include/rocprim/device/device_scan.hpp index 67dcac0a2..04affc3b7 100644 --- a/rocprim/include/rocprim/device/device_scan.hpp +++ b/rocprim/include/rocprim/device/device_scan.hpp @@ -505,6 +505,9 @@ inline hipError_t inclusive_scan(void* temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // AccType may be const or a reference. Get the non-const, non-reference type. // This is necessary because we may need to assign to instances of this type or create pointers to it. using safe_acc_type = typename std::remove_const::type>::type; @@ -550,6 +553,9 @@ inline hipError_t deterministic_inclusive_scan(void* temporary_stora const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // AccType may be const or a reference. Get the non-const, non-reference type. // This is necessary because we may need to assign to instances of this type or create pointers to it. using safe_acc_type = typename std::remove_const::type>::type; @@ -680,6 +686,9 @@ inline hipError_t exclusive_scan(void* temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // AccType may be const or a reference. Get the non-const, non-reference type. // This is necessary because we may need to assign to instances of this type or create pointers to it. using safe_acc_type = typename std::remove_const::type>::type; @@ -726,6 +735,9 @@ inline hipError_t deterministic_exclusive_scan(void* temporary_sto const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // AccType may be const or a reference. Get the non-const, non-reference type. // This is necessary because we may need to assign to instances of this type or create pointers to it. using safe_acc_type = typename std::remove_const::type>::type; diff --git a/rocprim/include/rocprim/device/device_scan_by_key.hpp b/rocprim/include/rocprim/device/device_scan_by_key.hpp index 93ca4bc93..76b109bef 100644 --- a/rocprim/include/rocprim/device/device_scan_by_key.hpp +++ b/rocprim/include/rocprim/device/device_scan_by_key.hpp @@ -411,6 +411,9 @@ inline hipError_t inclusive_scan_by_key(void* const temporary_sto const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using value_type = typename std::iterator_traits::value_type; return detail::scan_by_key_impl::value_type; return detail::scan_by_key_impl(temporary_storage, storage_size, input, diff --git a/rocprim/include/rocprim/device/device_search_n.hpp b/rocprim/include/rocprim/device/device_search_n.hpp index 8d6c8edbb..f12453332 100644 --- a/rocprim/include/rocprim/device/device_search_n.hpp +++ b/rocprim/include/rocprim/device/device_search_n.hpp @@ -94,6 +94,9 @@ hipError_t search_n(void* temporary_storage, const hipStream_t stream = static_cast(0), const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::search_n_impl(temporary_storage, storage_size, input, diff --git a/rocprim/include/rocprim/device/device_segmented_radix_sort.hpp b/rocprim/include/rocprim/device/device_segmented_radix_sort.hpp index 4b385e5c5..d05477539 100644 --- a/rocprim/include/rocprim/device/device_segmented_radix_sort.hpp +++ b/rocprim/include/rocprim/device/device_segmented_radix_sort.hpp @@ -713,6 +713,9 @@ hipError_t segmented_radix_sort_keys(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + empty_type * values = nullptr; bool ignored; return detail::segmented_radix_sort_impl( @@ -841,6 +844,9 @@ hipError_t segmented_radix_sort_keys_desc(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + empty_type * values = nullptr; bool ignored; return detail::segmented_radix_sort_impl( @@ -989,6 +995,9 @@ hipError_t segmented_radix_sort_pairs(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + bool ignored; return detail::segmented_radix_sort_impl( temporary_storage, storage_size, @@ -1133,6 +1142,9 @@ hipError_t segmented_radix_sort_pairs_desc(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + bool ignored; return detail::segmented_radix_sort_impl( temporary_storage, storage_size, @@ -1261,6 +1273,9 @@ hipError_t segmented_radix_sort_keys(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + empty_type * values = nullptr; bool is_result_in_output; hipError_t error = detail::segmented_radix_sort_impl( @@ -1395,6 +1410,9 @@ hipError_t segmented_radix_sort_keys_desc(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + empty_type * values = nullptr; bool is_result_in_output; hipError_t error = detail::segmented_radix_sort_impl( @@ -1545,6 +1563,9 @@ hipError_t segmented_radix_sort_pairs(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + bool is_result_in_output; hipError_t error = detail::segmented_radix_sort_impl( temporary_storage, storage_size, @@ -1689,6 +1710,9 @@ hipError_t segmented_radix_sort_pairs_desc(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + bool is_result_in_output; hipError_t error = detail::segmented_radix_sort_impl( temporary_storage, storage_size, diff --git a/rocprim/include/rocprim/device/device_segmented_reduce.hpp b/rocprim/include/rocprim/device/device_segmented_reduce.hpp index d748cdae8..64654fde8 100644 --- a/rocprim/include/rocprim/device/device_segmented_reduce.hpp +++ b/rocprim/include/rocprim/device/device_segmented_reduce.hpp @@ -244,6 +244,9 @@ hipError_t segmented_reduce(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::segmented_reduce_impl( temporary_storage, storage_size, input, output, diff --git a/rocprim/include/rocprim/device/device_segmented_scan.hpp b/rocprim/include/rocprim/device/device_segmented_scan.hpp index 74d37a769..8e1e091b6 100644 --- a/rocprim/include/rocprim/device/device_segmented_scan.hpp +++ b/rocprim/include/rocprim/device/device_segmented_scan.hpp @@ -263,6 +263,9 @@ hipError_t segmented_inclusive_scan(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using input_type = typename std::iterator_traits::value_type; using result_type = input_type; @@ -384,6 +387,9 @@ hipError_t segmented_exclusive_scan(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::segmented_scan_impl( temporary_storage, storage_size, input, output, segments, begin_offsets, end_offsets, initial_value, @@ -486,6 +492,9 @@ hipError_t segmented_inclusive_scan(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using input_type = typename std::iterator_traits::value_type; using result_type = input_type; using flag_type = typename std::iterator_traits::value_type; @@ -603,6 +612,9 @@ hipError_t segmented_exclusive_scan(void * temporary_storage, hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using result_type = InitValueType; using flag_type = typename std::iterator_traits::value_type; using headflag_scan_op_wrapper_type = diff --git a/rocprim/include/rocprim/device/device_select.hpp b/rocprim/include/rocprim/device/device_select.hpp index a7d65d1bf..f8928b567 100644 --- a/rocprim/include/rocprim/device/device_select.hpp +++ b/rocprim/include/rocprim/device/device_select.hpp @@ -138,6 +138,9 @@ hipError_t select(void * temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // Dummy unary predicate using unary_predicate_type = ::rocprim::empty_type; // Dummy inequality operation @@ -270,6 +273,9 @@ hipError_t select(void * temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // Dummy flag type using flag_type = ::rocprim::empty_type; using offset_type = unsigned int; @@ -408,6 +414,9 @@ inline hipError_t select(void* temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // Dummy inequality operation using inequality_op_type = ::rocprim::empty_type; using offset_type = unsigned int; @@ -529,6 +538,9 @@ hipError_t unique(void * temporary_storage, const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + // Dummy unary predicate using unary_predicate_type = ::rocprim::empty_type; using offset_type = unsigned int; @@ -628,6 +640,9 @@ inline hipError_t unique_by_key(void* temporary_storag const hipStream_t stream = 0, const bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using offset_type = unsigned int; // Dummy flag ::rocprim::empty_type* const no_flags = nullptr; diff --git a/rocprim/include/rocprim/device/device_transform.hpp b/rocprim/include/rocprim/device/device_transform.hpp index 4028dfc13..3e5b7c29a 100644 --- a/rocprim/include/rocprim/device/device_transform.hpp +++ b/rocprim/include/rocprim/device/device_transform.hpp @@ -126,6 +126,9 @@ inline hipError_t transform(InputIterator input, if( size == size_t(0) ) return hipSuccess; + // Clear any existing error + (void) hipGetLastError(); + using input_type = typename std::iterator_traits::value_type; using result_type = typename ::rocprim::invoke_result::type; @@ -257,6 +260,9 @@ hipError_t transform(InputIterator1 input1, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using value_type1 = typename std::iterator_traits::value_type; using value_type2 = typename std::iterator_traits::value_type; return transform( From a6e932e68b2262a739951003f6be93cabf1925cf Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Tue, 4 Mar 2025 15:01:21 -0500 Subject: [PATCH 3/5] Improve accuracy of HIP_CHECK, HIP_CHECK_LAUNCH macros Modify the HIP_CHECK and HIP_CHECK_LAUNCH macros so they more clearly capture returned, pre-launch and post-launch errors. These changes are based on the HIP documentation and example at https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/hip_runtime_api/error_handling.html. For HIP_CHECK, clear any pre-existing error, then capture any error returned by the expression being checked, and any HIP error returned by hipGetLastError. For HIP_CHECK_LAUNCH, clear any pre-existing error, then launch the kernel. Then capture any error returned by hipGetLastError (this will capture pre-launch issues like kernel argument problems), and capture any error returned by hipStreamSynchronize (this will capture in-kernel errors). --- benchmark/benchmark_utils.hpp | 1 - common/utils.hpp | 74 +++++++++++++++++------------------ test/common_test_header.hpp | 8 ++-- 3 files changed, 41 insertions(+), 42 deletions(-) diff --git a/benchmark/benchmark_utils.hpp b/benchmark/benchmark_utils.hpp index 88551af06..97b896860 100644 --- a/benchmark/benchmark_utils.hpp +++ b/benchmark/benchmark_utils.hpp @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include diff --git a/common/utils.hpp b/common/utils.hpp index c21380a8f..06cdf4cd5 100644 --- a/common/utils.hpp +++ b/common/utils.hpp @@ -23,6 +23,8 @@ #ifndef COMMON_UTILS_HPP_ #define COMMON_UTILS_HPP_ +#include + #include // This macro is used to print error messages generated by HIP_CHECK, below. @@ -46,48 +48,44 @@ // GoogleTest-compatible HIP_CHECK macro. FAIL is called to log the Google Test trace. // The lambda is invoked immediately as assertions that generate a fatal failure can // only be used in void-returning functions. -#define HIP_CHECK(condition) \ - { \ - (void) hipGetLastError(); \ - hipError_t ret_error = condition; \ - hipError_t hip_error = hipGetLastError(); \ - \ - if (ret_error != hipSuccess || hip_error != hipSuccess) \ - { \ - std::ostringstream err_stream; \ - if (ret_error != hipSuccess) \ - { \ - err_stream << "Call returned " << ret_error << ": " \ - << hipGetErrorString(ret_error) << std::endl; \ - } \ - if (hip_error != hipSuccess && hip_error != ret_error) \ - { \ - err_stream << "HIP error " << hip_error << ": " \ - << hipGetErrorString(hip_error) << std::endl; \ - } \ - err_stream << "File: " << __FILE__ << " line: " << __LINE__; \ - const std::string err_msg = err_stream.str(); \ - PRINT_ERROR(err_msg); \ - exit((ret_error != hipSuccess ? ret_error : hip_error)); \ - } \ +#define HIP_CHECK(condition) \ + { \ + (void) hipGetLastError(); \ + const hipError_t ret_error = condition; \ + const hipError_t hip_error = hipGetLastError(); \ + for (const hipError_t& error : {ret_error, hip_error}) \ + { \ + if (error != hipSuccess) \ + { \ + std::ostringstream err_stream; \ + err_stream << "HIP error: " << hipGetErrorString(error) << std::endl \ + << "File: " << __FILE__ << " line: " << __LINE__; \ + const std::string err_msg = err_stream.str(); \ + PRINT_ERROR(err_msg); \ + exit(error); \ + } \ + } \ } // GoogleTest-compatible HIP_CHECK macro that can be used for calls that don't return // a hipError (eg. kernel launches). -#define HIP_CHECK_LAUNCH(launch) \ - { \ - (void) hipGetLastError(); \ - launch; \ - hipError_t error = hipGetLastError(); \ - if (error != hipSuccess) \ - { \ - std::ostringstream err_stream; \ - err_stream << "HIP error: " << hipGetErrorString(error) << std::endl \ - << "File: " << __FILE__ << " line: " << __LINE__; }(); \ - const std::string err_msg = err_stream.str(); \ - PRINT_ERROR(err_msg); \ - exit(error); \ - } \ +#define HIP_CHECK_LAUNCH(launch) \ + { \ + (void) hipGetLastError(); \ + launch; \ + hipError_t error = hipGetLastError(); \ + for (const hipError_t& error : {hipGetLastError(), hipDeviceSynchronize()}) \ + { \ + if (error != hipSuccess) \ + { \ + std::ostringstream err_stream; \ + err_stream << "HIP error: " << hipGetErrorString(error) << std::endl \ + << "File: " << __FILE__ << " line: " << __LINE__; \ + const std::string err_msg = err_stream.str(); \ + PRINT_ERROR(err_msg); \ + exit(error); \ + } \ + } \ } namespace common diff --git a/test/common_test_header.hpp b/test/common_test_header.hpp index e46fe17d1..514970388 100755 --- a/test/common_test_header.hpp +++ b/test/common_test_header.hpp @@ -35,7 +35,6 @@ #include #include #include -#include #include "../common/utils.hpp" @@ -60,8 +59,11 @@ } \ if (error != hipSuccess) \ { \ - [error]() { FAIL() << "HIP error: " << hipGetErrorString(error) \ - << " line: " << __LINE__; }(); \ + [error]() \ + { \ + FAIL() << "HIP error: " << hipGetErrorString(error) << std::endl \ + << "File: " << __FILE__ << " line: " << __LINE__; \ + }(); \ exit(error); \ } \ } From 0fcf7fa22d484a8c39d31f89e98f26dfc4ceaf37 Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Fri, 7 Mar 2025 14:55:02 -0500 Subject: [PATCH 4/5] Add async version of HIP_CHECK_LAUNCH Some tests cannot synchronize immediately after a kernel call/ This change splits HIP_CHECK_LAUNCH into two macros: - HIP_CHECK_LAUNCH - does not call hipDeviceSynchronize (leaving detection of in-kernel errors up to the caller) - HIP_CHECK_LAUNCH_SYNC - does call hipDeviceSynchronize (catches in-kernel errors) It also adds a few hipGetLastError calls to clear the internally tracked HIP error for new algorithms that have been added. --- benchmark/benchmark_block_radix_sort.cpp | 21 ++++++----- benchmark/benchmark_config_dispatch.cpp | 3 +- common/utils.hpp | 24 +++++++++++++ .../rocprim/device/device_merge_inplace.hpp | 3 ++ .../device/device_run_length_encode.hpp | 3 ++ test/hip/test_hip_api.cpp | 2 +- test/hip/test_hip_async_copy.cpp | 36 ++++++++++--------- test/hip/test_ordered_block_id.cpp | 6 ++-- test/rocprim/detail/get_rocprim_version.cpp | 2 +- .../internal/test_internal_merge_path.cpp | 2 +- ...test_block_adjacent_difference.kernels.hpp | 14 ++++---- .../test_block_discontinuity.kernels.hpp | 6 ++-- test/rocprim/test_block_exchange.kernels.hpp | 12 +++---- test/rocprim/test_block_histogram.kernels.hpp | 2 +- test/rocprim/test_block_load_store.hpp | 6 ++-- test/rocprim/test_block_radix_rank.hpp | 2 +- .../rocprim/test_block_radix_sort.kernels.hpp | 32 ++++++++++------- test/rocprim/test_block_reduce.kernels.hpp | 6 ++-- test/rocprim/test_block_run_length_decode.cpp | 2 +- test/rocprim/test_block_scan.kernels.hpp | 14 ++++---- test/rocprim/test_block_shuffle.hpp | 8 ++--- test/rocprim/test_block_sort.hpp | 2 +- test/rocprim/test_config_dispatch.cpp | 2 +- test/rocprim/test_intrinsics.cpp | 22 ++++++------ test/rocprim/test_thread.cpp | 4 +-- test/rocprim/test_thread_algos.cpp | 12 +++---- test/rocprim/test_warp_exchange.cpp | 6 ++-- test/rocprim/test_warp_load.cpp | 4 +-- test/rocprim/test_warp_reduce.hpp | 28 +++++++-------- test/rocprim/test_warp_scan.hpp | 36 +++++++++---------- test/rocprim/test_warp_sort.hpp | 4 +-- test/rocprim/test_warp_store.cpp | 4 +-- 32 files changed, 185 insertions(+), 145 deletions(-) diff --git a/benchmark/benchmark_block_radix_sort.cpp b/benchmark/benchmark_block_radix_sort.cpp index 34912aedb..d4ef9d59d 100644 --- a/benchmark/benchmark_block_radix_sort.cpp +++ b/benchmark/benchmark_block_radix_sort.cpp @@ -166,15 +166,18 @@ void run_benchmark(benchmark::State& state, if(benchmark_kind == benchmark_kinds::sort_keys) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - sort_keys_kernel), - dim3(size / items_per_block), - dim3(BlockSize), - 0, - stream, - d_input, - d_output); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + sort_keys_kernel), + dim3(size / items_per_block), + dim3(BlockSize), + 0, + stream, + d_input, + d_output + ) + ); } else if(benchmark_kind == benchmark_kinds::sort_pairs) { diff --git a/benchmark/benchmark_config_dispatch.cpp b/benchmark/benchmark_config_dispatch.cpp index ee7a52ac0..4379a6812 100644 --- a/benchmark/benchmark_config_dispatch.cpp +++ b/benchmark/benchmark_config_dispatch.cpp @@ -63,9 +63,8 @@ static void BM_kernel_launch(benchmark::State& state) for(auto _ : state) { - HIP_CHECK_LAUNCH(hipLaunchKernelGGL(empty_kernel, dim3(1), dim3(1), 0, stream)); + HIP_CHECK_LAUNCH_SYNC(hipLaunchKernelGGL(empty_kernel, dim3(1), dim3(1), 0, stream)); } - HIP_CHECK(hipStreamSynchronize(stream)); } #define CREATE_BENCHMARK(ST, SK) \ diff --git a/common/utils.hpp b/common/utils.hpp index 06cdf4cd5..dbc20afaf 100644 --- a/common/utils.hpp +++ b/common/utils.hpp @@ -69,11 +69,35 @@ // GoogleTest-compatible HIP_CHECK macro that can be used for calls that don't return // a hipError (eg. kernel launches). +// This version does not synchronize after the launch. That means that any errors that +// occur during kernel execution will not be caught. To catch those, the caller should +// invoke hipStreamSynchronize and check its return value after the launch. #define HIP_CHECK_LAUNCH(launch) \ { \ (void) hipGetLastError(); \ launch; \ hipError_t error = hipGetLastError(); \ + { \ + if (error != hipSuccess) \ + { \ + std::ostringstream err_stream; \ + err_stream << "HIP error: " << hipGetErrorString(error) << std::endl \ + << "File: " << __FILE__ << " line: " << __LINE__; \ + const std::string err_msg = err_stream.str(); \ + PRINT_ERROR(err_msg); \ + exit(error); \ + } \ + } \ + } + +// GoogleTest-compatible HIP_CHECK macro that can be used for calls that don't return +// a hipError (eg. kernel launches). +// Note: we use hipGetLastError to retrieve pre-launch errors (eg. kernel argument issues) +// and hipDeviceSynchronize to retrieve in-kernel errors. +#define HIP_CHECK_LAUNCH_SYNC(launch) \ + { \ + (void) hipGetLastError(); \ + launch; \ for (const hipError_t& error : {hipGetLastError(), hipDeviceSynchronize()}) \ { \ if (error != hipSuccess) \ diff --git a/rocprim/include/rocprim/device/device_merge_inplace.hpp b/rocprim/include/rocprim/device/device_merge_inplace.hpp index e82ec1cf6..323b40e47 100644 --- a/rocprim/include/rocprim/device/device_merge_inplace.hpp +++ b/rocprim/include/rocprim/device/device_merge_inplace.hpp @@ -702,6 +702,9 @@ inline hipError_t merge_inplace(void* temporary_storage, const hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + using config = detail::default_or_custom_config>; constexpr size_t global_block_size = config::global_merge_block_size; diff --git a/rocprim/include/rocprim/device/device_run_length_encode.hpp b/rocprim/include/rocprim/device/device_run_length_encode.hpp index 79a913659..aa60c5459 100644 --- a/rocprim/include/rocprim/device/device_run_length_encode.hpp +++ b/rocprim/include/rocprim/device/device_run_length_encode.hpp @@ -472,6 +472,9 @@ inline hipError_t run_length_encode_non_trivial_runs(void* tem hipStream_t stream = 0, bool debug_synchronous = false) { + // Clear any existing error + (void) hipGetLastError(); + return detail::run_length_encode::run_length_encode_non_trivial_runs_impl( temporary_storage, storage_size, diff --git a/test/hip/test_hip_api.cpp b/test/hip/test_hip_api.cpp index fe676cb8b..e7a7cd505 100644 --- a/test/hip/test_hip_api.cpp +++ b/test/hip/test_hip_api.cpp @@ -57,7 +57,7 @@ TEST(HIPTests, Saxpy) common::device_ptr d_x(x); common::device_ptr d_y(y); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(saxpy_kernel), dim3((N + 255) / 256), dim3(256), diff --git a/test/hip/test_hip_async_copy.cpp b/test/hip/test_hip_async_copy.cpp index 5f89bc585..86433cb6b 100644 --- a/test/hip/test_hip_async_copy.cpp +++ b/test/hip/test_hip_async_copy.cpp @@ -203,14 +203,16 @@ TEST_F(HipAsyncCopyTests, AsyncCopyBreadthFirst) const unsigned int grid_size = (sizes[i] + block_size - 1) / block_size; if(sizes[i] > 0) { - hipLaunchKernelGGL(HIP_KERNEL_NAME(increment_kernel), - dim3(grid_size), - dim3(block_size), - 0, - streams[i], - d_inputs[i].get(), - sizes[i]); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(increment_kernel), + dim3(grid_size), + dim3(block_size), + 0, + streams[i], + d_inputs[i].get(), + sizes[i] + ) + ); } } for(size_t i = 0; i < sizes.size(); i++) @@ -253,14 +255,16 @@ TEST(HipAsyncCopyTestsExtra, StreamInStruct) common::device_ptr d_input(input); const unsigned int grid_size = (size + block_size - 1) / block_size; - hipLaunchKernelGGL(HIP_KERNEL_NAME(increment_kernel), - dim3(grid_size), - dim3(block_size), - 0, - stream_wrapper.stream, - d_input.get(), - size); - HIP_CHECK(hipGetLastError()); + HIP_CHECK_LAUNCH( + hipLaunchKernelGGL(HIP_KERNEL_NAME(increment_kernel), + dim3(grid_size), + dim3(block_size), + 0, + stream_wrapper.stream, + d_input.get(), + size + ) + ); vector_type output(size); HIP_CHECK(hipMemcpyAsync(output.data(), diff --git a/test/hip/test_ordered_block_id.cpp b/test/hip/test_ordered_block_id.cpp index 581677b20..f3e967b8c 100644 --- a/test/hip/test_ordered_block_id.cpp +++ b/test/hip/test_ordered_block_id.cpp @@ -53,19 +53,17 @@ bool test_func(int block_count, int thread_count) { common::device_ptr d_flags(block_count); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( test_kernel, dim3(block_count), dim3(thread_count), 0, - hipStreamDefault, + 0, d_flags.get() ) ); - HIP_CHECK(hipDeviceSynchronize()); - auto h_vec = d_flags.load(); for(const auto i : h_vec) { diff --git a/test/rocprim/detail/get_rocprim_version.cpp b/test/rocprim/detail/get_rocprim_version.cpp index d74aba5ab..5d7804de5 100644 --- a/test/rocprim/detail/get_rocprim_version.cpp +++ b/test/rocprim/detail/get_rocprim_version.cpp @@ -37,7 +37,7 @@ unsigned int get_rocprim_version_on_device() { common::device_ptr d_version(1); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( get_version_kernel, dim3(1), diff --git a/test/rocprim/internal/test_internal_merge_path.cpp b/test/rocprim/internal/test_internal_merge_path.cpp index 972b5e04d..1254c3dfc 100644 --- a/test/rocprim/internal/test_internal_merge_path.cpp +++ b/test/rocprim/internal/test_internal_merge_path.cpp @@ -29,7 +29,7 @@ void serial_merge(std::vector& input, common::device_ptr device_data(input); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(merge_kernel), dim3(1), diff --git a/test/rocprim/test_block_adjacent_difference.kernels.hpp b/test/rocprim/test_block_adjacent_difference.kernels.hpp index 97f707458..c7bd890d5 100644 --- a/test/rocprim/test_block_adjacent_difference.kernels.hpp +++ b/test/rocprim/test_block_adjacent_difference.kernels.hpp @@ -439,7 +439,7 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr device_heads(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( flag_heads_kernel), @@ -542,7 +542,7 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr device_tails(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( flag_tails_kernel), @@ -661,7 +661,7 @@ auto test_block_adjacent_difference() -> typename std::enable_if::t common::device_ptr device_tails(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(flag_heads_and_tails_kernel typename std::enable_if::t common::device_ptr d_output(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_left_kernel typename std::enable_if::t common::device_ptr d_output(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_right_kernel typename std::enable_if::t common::device_ptr d_output(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_left_partial_kernel typename std::enable_if::t common::device_ptr d_output(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(subtract_right_partial_kernel device_heads(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( flag_heads_kernel), @@ -350,7 +350,7 @@ auto test_block_discontinuity() common::device_ptr device_tails(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( flag_tails_kernel), @@ -449,7 +449,7 @@ auto test_block_discontinuity() common::device_ptr device_tails(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(flag_heads_and_tails_kernel typename std::enable_if), @@ -322,7 +322,7 @@ auto test_block_exchange(int /*device_id*/) -> typename std::enable_if), @@ -408,7 +408,7 @@ auto test_block_exchange(int device_id) -> typename std::enable_if: // Running kernel constexpr unsigned int grid_size = (size / items_per_block); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( blocked_to_warp_striped_kernel), @@ -494,7 +494,7 @@ auto test_block_exchange(int device_id) -> typename std::enable_if: // Running kernel constexpr unsigned int grid_size = (size / items_per_block); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( warp_striped_to_blocked_kernel), @@ -576,7 +576,7 @@ auto test_block_exchange(int /*device_id*/) -> typename std::enable_if), @@ -661,7 +661,7 @@ auto test_block_exchange(int /*device_id*/) -> typename std::enable_if), diff --git a/test/rocprim/test_block_histogram.kernels.hpp b/test/rocprim/test_block_histogram.kernels.hpp index 460c9dec9..124f94efb 100644 --- a/test/rocprim/test_block_histogram.kernels.hpp +++ b/test/rocprim/test_block_histogram.kernels.hpp @@ -168,7 +168,7 @@ void test_block_histogram_input_arrays() common::device_ptr device_output_bin(output_bin); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( histogram_kernel), diff --git a/test/rocprim/test_block_load_store.hpp b/test/rocprim/test_block_load_store.hpp index b48250481..c5b851729 100644 --- a/test/rocprim/test_block_load_store.hpp +++ b/test/rocprim/test_block_load_store.hpp @@ -85,7 +85,7 @@ typed_test_def(suite_name, name_suffix, LoadStoreClass) common::device_ptr device_output(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( load_store_kernel), @@ -171,7 +171,7 @@ typed_test_def(suite_name, name_suffix, LoadStoreClassValid) common::device_ptr device_output(output); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(load_store_valid_kernel device_output(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(load_store_valid_default_kernel d_ranks_output(size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( rank_kernel), diff --git a/test/rocprim/test_block_radix_sort.kernels.hpp b/test/rocprim/test_block_radix_sort.kernels.hpp index 2250a977c..8064b069e 100644 --- a/test/rocprim/test_block_radix_sort.kernels.hpp +++ b/test/rocprim/test_block_radix_sort.kernels.hpp @@ -243,7 +243,7 @@ auto test_block_radix_sort() -> typename std::enable_if::type // Preparing device common::device_ptr device_keys_output(keys_output, size); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(sort_key_kernel), dim3(grid_size), @@ -356,19 +356,25 @@ auto test_block_radix_sort() -> typename std::enable_if::type common::device_ptr device_values_output(values_output); // Running kernel - HIP_CHECK_LAUNCH((sort_key_value_kernel - <<>>(device_keys_output.get(), - device_values_output.get(), - to_striped, - descending, - start_bit, - end_bit + HIP_CHECK_LAUNCH_SYNC( + hipLaunchKernelGGL( + HIP_KERNEL_NAME(sort_key_value_kernel), + dim3(grid_size), + dim3(block_size), + 0, + 0, + device_keys_output.get(), + device_values_output.get(), + to_striped, + descending, + start_bit, + end_bit ) - )); + ); // Getting results to host keys_output = device_keys_output.load_to_unique_ptr(); diff --git a/test/rocprim/test_block_reduce.kernels.hpp b/test/rocprim/test_block_reduce.kernels.hpp index 2b641310f..9a4d71696 100644 --- a/test/rocprim/test_block_reduce.kernels.hpp +++ b/test/rocprim/test_block_reduce.kernels.hpp @@ -102,7 +102,7 @@ struct static_run_algo ); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(reduce_kernel), dim3(grid_size), dim3(BlockSize), 0, 0, @@ -176,7 +176,7 @@ struct static_run_valid ); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(reduce_valid_kernel), dim3(grid_size), dim3(BlockSize), 0, 0, @@ -288,7 +288,7 @@ void test_block_reduce_input_arrays() common::device_ptr device_output_reductions(output_reductions); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( reduce_array_kernel), diff --git a/test/rocprim/test_block_run_length_decode.cpp b/test/rocprim/test_block_run_length_decode.cpp index 58bcedd58..c293a10e5 100644 --- a/test/rocprim/test_block_run_length_decode.cpp +++ b/test/rocprim/test_block_run_length_decode.cpp @@ -233,7 +233,7 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode) common::device_ptr d_decoded_runs(expected.size()); common::device_ptr d_decoded_offsets(expected.size()); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(block_run_length_decode_kernel), dim3(grid_size), dim3(BlockSize), 0, 0, @@ -567,7 +567,7 @@ auto test_block_scan_input_arrays() common::device_ptr device_output(output); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(inclusive_scan_array_kernel device_output_reductions(output_reductions); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(inclusive_scan_reduce_array_kernel device_output_bp(output_block_prefixes); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(inclusive_scan_array_prefix_callback_kernel device_output(output); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(exclusive_scan_array_kernel device_output_reductions(output_reductions.size()); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(exclusive_scan_reduce_array_kernel device_output_bp(output_block_prefixes.size()); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(exclusive_scan_prefix_callback_array_kernel device_output(input_data.size()); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_offset_kernel), dim3(grid_size), dim3(block_size), @@ -117,7 +117,7 @@ typed_test_def(suite_name, name_suffix, BlockRotate) common::device_ptr device_output(input_data.size()); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_rotate_kernel), dim3(grid_size), dim3(block_size), @@ -177,7 +177,7 @@ typed_test_def(suite_name, name_suffix, BlockUp) common::device_ptr device_output(input_data.size()); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_up_kernel), dim3(grid_size), dim3(block_size), @@ -241,7 +241,7 @@ typed_test_def(suite_name, name_suffix, BlockDown) common::device_ptr device_output(input_data.size()); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_down_kernel), dim3(grid_size), dim3(block_size), diff --git a/test/rocprim/test_block_sort.hpp b/test/rocprim/test_block_sort.hpp index f40655e00..04870d16e 100644 --- a/test/rocprim/test_block_sort.hpp +++ b/test/rocprim/test_block_sort.hpp @@ -114,7 +114,7 @@ void TestSortKeyValue() // Running kernel, ignored if invalid size if(size > 0) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(sort_pairs_kernel device_arch_ptr(1); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( write_target_arch, dim3(1), diff --git a/test/rocprim/test_intrinsics.cpp b/test/rocprim/test_intrinsics.cpp index 420786739..13e1a364a 100644 --- a/test/rocprim/test_intrinsics.cpp +++ b/test/rocprim/test_intrinsics.cpp @@ -367,7 +367,7 @@ void test_shuffle() d_data.store(input); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_kernel), dim3(1), dim3(hardware_warp_size), @@ -468,7 +468,7 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleIndex) device_src_lanes.store(src_lanes); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(shuffle_index_kernel), dim3(1), dim3(hardware_warp_size), @@ -515,7 +515,7 @@ TEST(RocprimIntrinsicsTests, LaneId) common::device_ptr d_output(size); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( lane_id_kernel, dim3(blocks), @@ -523,8 +523,8 @@ TEST(RocprimIntrinsicsTests, LaneId) 0, hipStreamDefault, d_output.get() - ) - ); + ) + ); const auto h_output = d_output.load(); @@ -627,7 +627,7 @@ TEST(RocprimIntrinsicsTests, MaskedBitCount) } } - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(masked_bit_count_kernel, dim3(blocks), dim3(block_size), @@ -746,7 +746,7 @@ void warp_any_all_test() d_input.store(input); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(warp_any_all_kernel), dim3(blocks), dim3(block_size), @@ -887,7 +887,7 @@ TYPED_TEST(RocprimIntrinsicsTests, WarpPermute) d_input.store(input); d_indices.store(indices); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(warp_permute_kernel), dim3(blocks), dim3(block_size), @@ -1002,7 +1002,7 @@ TEST(RocprimIntrinsicsTests, MatchAny) d_input.store(input); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(match_any_kernel), dim3(blocks), dim3(block_size), @@ -1093,7 +1093,7 @@ TEST(RocprimIntrinsicsTests, Ballot) d_input.store(input); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(ballot_kernel, dim3(blocks), dim3(block_size), @@ -1194,7 +1194,7 @@ TEST(RocprimIntrinsicsTests, GroupElect) d_input.store(input); d_output.store(output); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(group_elect_kernel), dim3(blocks), dim3(block_size), diff --git a/test/rocprim/test_thread.cpp b/test/rocprim/test_thread.cpp index a72aa7040..7f45620c9 100644 --- a/test/rocprim/test_thread.cpp +++ b/test/rocprim/test_thread.cpp @@ -115,7 +115,7 @@ TYPED_TEST(RocprimThreadTests, FlatBlockThreadID) common::device_ptr device_output(block_size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(flat_id_kernel), dim3(1), @@ -183,7 +183,7 @@ TYPED_TEST(RocprimThreadTests, FlatBlockID) // Preparing device common::device_ptr device_output(block_size); // Running kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(block_id_kernel), dim3(block_size_x, block_size_y, block_size_z), diff --git a/test/rocprim/test_thread_algos.cpp b/test/rocprim/test_thread_algos.cpp index 3a67843c5..621d8f2ab 100644 --- a/test/rocprim/test_thread_algos.cpp +++ b/test/rocprim/test_thread_algos.cpp @@ -144,7 +144,7 @@ TYPED_TEST(RocprimThreadOperationTests, Load) common::device_ptr device_input(input); common::device_ptr device_output(input.size()); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(thread_load_kernel), grid_size, @@ -276,7 +276,7 @@ TYPED_TEST(RocprimThreadOperationTests, StoreNontemporal) common::device_ptr device_input(input); common::device_ptr device_output(input.size()); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(thread_store_kernel), grid_size, @@ -353,7 +353,7 @@ TYPED_TEST(RocprimThreadOperationTests, Reduction) common::device_ptr device_input(input); common::device_ptr device_output(input.size()); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(thread_reduce_kernel), grid_size, @@ -427,7 +427,7 @@ TYPED_TEST(RocprimThreadOperationTests, Scan) common::device_ptr device_input(input); common::device_ptr device_output(input.size()); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(thread_scan_kernel), grid_size, @@ -541,7 +541,7 @@ void merge_path_search_test() common::device_ptr device_output_oob_x(1); common::device_ptr device_output_oob_y(1); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(thread_search_kernel), grid_size, @@ -558,7 +558,7 @@ void merge_path_search_test() ) ); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(thread_search_out_of_bounds_kernel), grid_size, diff --git a/test/rocprim/test_warp_exchange.cpp b/test/rocprim/test_warp_exchange.cpp index 1600193a6..92836f5dc 100644 --- a/test/rocprim/test_warp_exchange.cpp +++ b/test/rocprim/test_warp_exchange.cpp @@ -254,7 +254,7 @@ TYPED_TEST(WarpExchangeTest, WarpExchange) common::device_ptr d_output(items_count); HIP_CHECK(hipMemset(d_output.get(), 0, items_count * sizeof(T))); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_exchange_kernel), dim3(1), @@ -307,7 +307,7 @@ TYPED_TEST(WarpExchangeTest, WarpExchangeNotInplace) common::device_ptr d_output(items_count); HIP_CHECK(hipMemset(d_output.get(), 0, items_count * sizeof(T))); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_exchange_kernel), dim3(1), @@ -423,7 +423,7 @@ TYPED_TEST(WarpExchangeScatterTest, WarpExchangeScatter) common::device_ptr d_output(items_count); HIP_CHECK(hipMemset(d_output.get(), 0, items_count * sizeof(T))); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_exchange_scatter_kernel), dim3(1), diff --git a/test/rocprim/test_warp_load.cpp b/test/rocprim/test_warp_load.cpp index e1e05cef8..dea0a96f2 100644 --- a/test/rocprim/test_warp_load.cpp +++ b/test/rocprim/test_warp_load.cpp @@ -264,7 +264,7 @@ TYPED_TEST(WarpLoadTest, WarpLoad) common::device_ptr d_input(input); common::device_ptr d_output(items_count); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_load_kernel), dim3(1), @@ -308,7 +308,7 @@ TYPED_TEST(WarpLoadTest, WarpLoadGuarded) common::device_ptr d_input(input); common::device_ptr d_output(items_count); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_load_guarded_kernel), dim3(1), diff --git a/test/rocprim/test_warp_reduce.hpp b/test/rocprim/test_warp_reduce.hpp index f1248903e..84fc79e9d 100644 --- a/test/rocprim/test_warp_reduce.hpp +++ b/test/rocprim/test_warp_reduce.hpp @@ -116,7 +116,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceSum) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_reduce_sum_kernel), dim3(size / block_size_ws32), @@ -130,7 +130,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceSum) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_reduce_sum_kernel), dim3(size / block_size_ws64), @@ -231,7 +231,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, AllReduceSum) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_allreduce_sum_kernel), dim3(size / block_size_ws32), @@ -245,7 +245,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, AllReduceSum) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_allreduce_sum_kernel), dim3(size / block_size_ws64), @@ -343,7 +343,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceSumValid) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_reduce_sum_kernel), dim3(size / block_size_ws32), @@ -358,7 +358,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceSumValid) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_reduce_sum_kernel), dim3(size / block_size_ws64), @@ -462,7 +462,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, AllReduceSumValid) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_allreduce_sum_kernel), dim3(size / block_size_ws32), @@ -477,7 +477,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, AllReduceSumValid) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_allreduce_sum_kernel), dim3(size / block_size_ws64), @@ -586,7 +586,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceCustomStruct) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_reduce_sum_kernel), dim3(size / block_size_ws32), @@ -600,7 +600,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceCustomStruct) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_reduce_sum_kernel), dim3(size / block_size_ws64), @@ -713,7 +713,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, HeadSegmentedReduceSum) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(head_segmented_warp_reduce_kernel), dim3(grid_size), @@ -131,7 +131,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScan) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_inclusive_scan_kernel), dim3(grid_size), @@ -356,7 +356,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScanReduce) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( warp_inclusive_scan_reduce_kernel), @@ -372,7 +372,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScanReduce) } else if(current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( warp_inclusive_scan_reduce_kernel), @@ -611,7 +611,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveScan) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_exclusive_scan_kernel), dim3(grid_size), @@ -626,7 +626,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveScan) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_exclusive_scan_kernel), dim3(grid_size), @@ -847,7 +847,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveScanWoInit) // Launching kernel if(current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( warp_exclusive_scan_wo_init_kernel), @@ -862,7 +862,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveScanWoInit) } else if(current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( warp_exclusive_scan_wo_init_kernel), @@ -983,7 +983,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveReduceScan) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( warp_exclusive_scan_reduce_kernel), @@ -1000,7 +1000,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveReduceScan) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME( warp_exclusive_scan_reduce_kernel), @@ -1127,7 +1127,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveReduceScanWoInit) // Launching kernel if(current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_exclusive_scan_reduce_wo_init_kernel), dim3(grid_size), @@ -1283,7 +1283,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, Scan) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_scan_kernel), dim3(grid_size), @@ -1405,7 +1405,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ScanReduce) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_scan_reduce_kernel), dim3(grid_size), @@ -1422,7 +1422,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ScanReduce) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_scan_reduce_kernel), dim3(grid_size), @@ -1546,7 +1546,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScanCustomType) // Launching kernel if (current_device_warp_size == ws32) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_inclusive_scan_kernel), dim3(grid_size), @@ -1560,7 +1560,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScanCustomType) } else if (current_device_warp_size == ws64) { - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_inclusive_scan_kernel), dim3(grid_size), diff --git a/test/rocprim/test_warp_sort.hpp b/test/rocprim/test_warp_sort.hpp index 864e2bab7..17a99bc59 100644 --- a/test/rocprim/test_warp_sort.hpp +++ b/test/rocprim/test_warp_sort.hpp @@ -100,7 +100,7 @@ typed_test_def(RocprimWarpSortShuffleBasedTests, name_suffix, Sort) common::device_ptr d_output(output); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(test_hip_warp_sort), dim3(grid_size), @@ -190,7 +190,7 @@ typed_test_def(RocprimWarpSortShuffleBasedTests, name_suffix, SortKeyInt) common::device_ptr d_output_value(output_value); // Launching kernel - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL(HIP_KERNEL_NAME(test_hip_sort_key_value_kernel d_input(input); common::device_ptr d_output(items_count); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_store_kernel), dim3(1), @@ -294,7 +294,7 @@ TYPED_TEST(WarpStoreTest, WarpStoreGuarded) common::device_ptr d_output(items_count); HIP_CHECK(hipMemset(d_output.get(), 0, items_count * sizeof(T))); - HIP_CHECK_LAUNCH( + HIP_CHECK_LAUNCH_SYNC( hipLaunchKernelGGL( HIP_KERNEL_NAME(warp_store_guarded_kernel), dim3(1), From 94719e3678e6e1992c9d73d2d0ed4d0272eaeef7 Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Thu, 15 May 2025 21:00:40 -0400 Subject: [PATCH 5/5] Remove duplicate lines Remove duplicate calls to clear hipGetLastError. --- rocprim/include/rocprim/device/device_binary_search.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/rocprim/include/rocprim/device/device_binary_search.hpp b/rocprim/include/rocprim/device/device_binary_search.hpp index dfe148b04..9dc043976 100644 --- a/rocprim/include/rocprim/device/device_binary_search.hpp +++ b/rocprim/include/rocprim/device/device_binary_search.hpp @@ -355,9 +355,6 @@ hipError_t upper_bound(void * temporary_storage, // Clear any existing error (void) hipGetLastError(); - // Clear any existing error - (void) hipGetLastError(); - static_assert(detail::is_default_or_has_tag::value, "Config must be a specialization of struct template upper_bound_config"); using value_type = typename std::iterator_traits::value_type;