Skip to content

Commit 1d13965

Browse files
Introduced oneprof tool, minor fixes
1 parent cc75620 commit 1d13965

File tree

25 files changed

+2308
-89
lines changed

25 files changed

+2308
-89
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ You may obtain a copy of the License at https://opensource.org/licenses/MIT
4747

4848
## Profiling & Debug Tools
4949
- [onetrace](tools/onetrace) - host and device tracing tool for OpenCL(TM) and Level Zero backends with support of DPC++ (both for CPU and GPU) and OpenMP* GPU offload;
50+
- [oneprof](tools/oneprof) - GPU HW metrics collection tool for OpenCL(TM) and Level Zero backends with support of DPC++ and OpenMP* GPU offload;
5051
- [ze_tracer](tools/ze_tracer) - "Swiss army knife" for Level Zero API call tracing and profiling (former ze_intercept);
5152
- [cl_tracer](tools/cl_tracer) - "Swiss army knife" for OpenCL(TM) API call tracing and profiling;
5253
- [gpuinfo](tools/gpuinfo) - provides basic information about the GPUs installed in a system, and the list of HW metrics one can collect for it;

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.34.1
1+
0.35.0

chapters/device_activity_tracing/LevelZero.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -112,4 +112,5 @@ Event pool profiling does not require any additional environment variables to be
112112
113113
## Tools
114114
- [Level Zero Tracer](../../tools/ze_tracer)
115-
- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace)
115+
- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace)
116+
- [GPU Metrics Collection Tool for Data Parallel C++ (DPC++)](../../tools/oneprof)

chapters/metrics_collection/LevelZero.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -304,4 +304,7 @@ ZET_ENABLE_METRICS=1 ./<application>
304304
## Samples
305305
- [Level Zero Metric Info](../../samples/ze_metric_info)
306306
- [Level Zero Metric Query](../../samples/ze_metric_query)
307-
- [Level Zero Metric Streamer](../../samples/ze_metric_streamer)
307+
- [Level Zero Metric Streamer](../../samples/ze_metric_streamer)
308+
309+
## Tools
310+
- [GPU Metrics Collection Tool for Data Parallel C++ (DPC++)](../../tools/oneprof)

chapters/runtime_api_tracing/LevelZero.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,5 @@ ZE_ENABLE_TRACING_LAYER=1 ./<application>
101101
102102
## Tools
103103
- [Level Zero Tracer](../../tools/ze_tracer)
104-
- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace)
104+
- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace)
105+
- [GPU Metrics Collection Tool for Data Parallel C++ (DPC++)](../../tools/oneprof)

loader/loader.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ int main(int argc, char* argv[]) {
110110
return 0;
111111
}
112112

113+
set_tool_env();
114+
113115
int app_index = parse_args(argc, argv);
114116
if (app_index <= 0 || app_index >= argc) {
115117
if (app_index >= argc) {
@@ -129,8 +131,6 @@ int main(int argc, char* argv[]) {
129131
}
130132
app_args.push_back(nullptr);
131133

132-
set_tool_env();
133-
134134
#if defined(_WIN32)
135135

136136
BOOL ok = FALSE;

samples/ze_gemm/main.cc

+5-3
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,8 @@ static float RunAndCheck(ze_kernel_handle_t kernel,
178178
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
179179

180180
double time = static_cast<double>(
181-
(timestamp.global.kernelEnd - timestamp.global.kernelStart) *
182-
props.timerResolution);
183-
time /= NSEC_IN_SEC;
181+
timestamp.global.kernelEnd - timestamp.global.kernelStart) /
182+
props.timerResolution;
184183
std::cout << "Matrix multiplication time: " << time <<
185184
" sec" << std::endl;
186185

@@ -238,6 +237,9 @@ static void Compute(ze_device_handle_t device,
238237
}
239238

240239
int main(int argc, char* argv[]) {
240+
utils::SetEnv("NEOReadDebugKeys", "1");
241+
utils::SetEnv("UseCyclesPerSecondTimer", "1");
242+
241243
ze_result_t status = ZE_RESULT_SUCCESS;
242244
status = zeInit(ZE_INIT_FLAG_GPU_ONLY);
243245
PTI_ASSERT(status == ZE_RESULT_SUCCESS);

samples/ze_metric_info/main.cc

+2-44
Original file line numberDiff line numberDiff line change
@@ -10,48 +10,6 @@
1010
#include "utils.h"
1111
#include "ze_utils.h"
1212

13-
static std::string GetResultType(zet_value_type_t type) {
14-
switch (type) {
15-
case ZET_VALUE_TYPE_UINT32:
16-
return "UINT32";
17-
case ZET_VALUE_TYPE_UINT64:
18-
return "UINT64";
19-
case ZET_VALUE_TYPE_FLOAT32:
20-
return "FLOAT32";
21-
case ZET_VALUE_TYPE_FLOAT64:
22-
return "FLOAT64";
23-
case ZET_VALUE_TYPE_BOOL8:
24-
return "BOOL8";
25-
default:
26-
break;
27-
}
28-
return "UNKNOWN";
29-
}
30-
31-
static std::string GetMetricType(zet_metric_type_t type) {
32-
switch (type) {
33-
case ZET_METRIC_TYPE_DURATION:
34-
return "DURATION";
35-
case ZET_METRIC_TYPE_EVENT:
36-
return "EVENT";
37-
case ZET_METRIC_TYPE_EVENT_WITH_RANGE:
38-
return "EVENT_WITH_RANGE";
39-
case ZET_METRIC_TYPE_THROUGHPUT:
40-
return "THROUGHPUT";
41-
case ZET_METRIC_TYPE_TIMESTAMP:
42-
return "TIMESTAMP";
43-
case ZET_METRIC_TYPE_FLAG:
44-
return "FLAG";
45-
case ZET_METRIC_TYPE_RATIO:
46-
return "RATIO";
47-
case ZET_METRIC_TYPE_RAW:
48-
return "RAW";
49-
default:
50-
break;
51-
}
52-
return "UNKNOWN";
53-
}
54-
5513
static void PrintMetricsInfo(ze_device_handle_t device) {
5614
PTI_ASSERT(device != nullptr);
5715
ze_result_t status = ZE_RESULT_SUCCESS;
@@ -109,8 +67,8 @@ static void PrintMetricsInfo(ze_device_handle_t device) {
10967
std::cout << "\tMetric " << mid << ": D" << group_props.domain <<
11068
" / " << sampling_type << " / " << group_props.name << " / " <<
11169
metric_props.name << " (" << metric_props.description << ") [" <<
112-
GetResultType(metric_props.resultType) << ", " <<
113-
GetMetricType(metric_props.metricType) << "]" << std::endl;
70+
utils::ze::GetResultType(metric_props.resultType) << ", " <<
71+
utils::ze::GetMetricType(metric_props.metricType) << "]" << std::endl;
11472
}
11573
}
11674
}

samples/ze_metric_streamer/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ endif()
2727
FindL0Library(zet_metric_streamer)
2828
FindL0Headers(zet_metric_streamer)
2929

30+
CheckForMDLibrary(oneprof_tool)
31+
CheckForMetricsLibrary(oneprof_tool)
32+
3033
# Loader
3134

3235
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_metric_streamer")

samples/ze_metric_streamer/ze_metric_collector.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,8 @@ class ZeMetricCollector {
220220
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
221221

222222
ze_event_pool_desc_t event_pool_desc = {
223-
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, 0, 1};
223+
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr,
224+
ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1};
224225
ze_event_pool_handle_t event_pool = nullptr;
225226
status = zeEventPoolCreate(collector->context_, &event_pool_desc,
226227
0, nullptr, &event_pool);

tests/run.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@
5252
"--chrome-device-timeline",
5353
"--chrome-kernel-timeline",
5454
"--chrome-device-stages",
55-
"dpc", "omp"]]
55+
"dpc", "omp"],
56+
["oneprof",
57+
"-i", "-m", "-k", "-a", "cl", "ze", "omp"]]
5658

5759
def remove_python_cache(path):
5860
files = os.listdir(path)

tests/tools/oneprof.py

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import os
2+
import subprocess
3+
import sys
4+
5+
from samples import cl_gemm
6+
from samples import dpc_gemm
7+
from samples import omp_gemm
8+
from samples import ze_gemm
9+
import utils
10+
11+
def config(path):
12+
p = subprocess.Popen(["cmake",\
13+
"-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."],\
14+
cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
15+
p.wait()
16+
stdout, stderr = utils.run_process(p)
17+
if stderr and stderr.find("CMake Error") != -1:
18+
return stderr
19+
return None
20+
21+
def build(path):
22+
p = subprocess.Popen(["make"], cwd = path,\
23+
stdout = subprocess.PIPE, stderr = subprocess.PIPE)
24+
p.wait()
25+
stdout, stderr = utils.run_process(p)
26+
if stderr and stderr.lower().find("error") != -1:
27+
return stderr
28+
return None
29+
30+
def run(path, option):
31+
if option == "cl":
32+
app_folder = utils.get_sample_build_path("cl_gemm")
33+
app_file = os.path.join(app_folder, "cl_gemm")
34+
p = subprocess.Popen(["./oneprof", "-k", "-a", app_file, "gpu", "1024", "1"],\
35+
cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
36+
elif option == "ze":
37+
app_folder = utils.get_sample_build_path("ze_gemm")
38+
app_file = os.path.join(app_folder, "ze_gemm")
39+
p = subprocess.Popen(["./oneprof", "-k", "-a", app_file, "1024", "1"],\
40+
cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
41+
elif option == "omp":
42+
app_folder = utils.get_sample_build_path("omp_gemm")
43+
app_file = os.path.join(app_folder, "omp_gemm")
44+
p = subprocess.Popen(["./oneprof", "-k", "-a", app_file, "gpu", "1024", "1"],\
45+
cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
46+
else:
47+
app_folder = utils.get_sample_build_path("dpc_gemm")
48+
app_file = os.path.join(app_folder, "dpc_gemm")
49+
p = subprocess.Popen(["./oneprof", option, app_file, "gpu", "1024", "1"],\
50+
cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
51+
stdout, stderr = utils.run_process(p)
52+
if not stdout:
53+
return "stdout is empty"
54+
if not stderr:
55+
return "stderr is empty"
56+
if stdout.find(" CORRECT") == -1:
57+
return stdout
58+
if stderr.find("WARNING") != -1:
59+
return stderr
60+
return None
61+
62+
def main(option):
63+
path = utils.get_tool_build_path("oneprof")
64+
if option == "cl":
65+
log = cl_gemm.main("gpu")
66+
elif option == "ze":
67+
log = ze_gemm.main(None)
68+
elif option == "omp":
69+
log = omp_gemm.main("gpu")
70+
else:
71+
log = dpc_gemm.main("gpu")
72+
if log:
73+
return log
74+
log = config(path)
75+
if log:
76+
return log
77+
log = build(path)
78+
if log:
79+
return log
80+
log = run(path, option)
81+
if log:
82+
return log
83+
84+
if __name__ == "__main__":
85+
option = "-m"
86+
if len(sys.argv) > 1 and sys.argv[1] == "-i":
87+
option = "-i"
88+
if len(sys.argv) > 1 and sys.argv[1] == "-k":
89+
option = "-k"
90+
if len(sys.argv) > 1 and sys.argv[1] == "-a":
91+
option = "-a"
92+
if len(sys.argv) > 1 and sys.argv[1] == "cl":
93+
option = "cl"
94+
if len(sys.argv) > 1 and sys.argv[1] == "ze":
95+
option = "ze"
96+
if len(sys.argv) > 1 and sys.argv[1] == "omp":
97+
option = "omp"
98+
log = main(option)
99+
if log:
100+
print(log)

tools/cl_tracer/cl_tracer.h

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class ClTracer {
3636
}
3737

3838
ClTracer* tracer = new ClTracer(options);
39+
PTI_ASSERT(tracer != nullptr);
3940

4041
if (tracer->CheckOption(TRACE_DEVICE_TIMING) ||
4142
tracer->CheckOption(TRACE_DEVICE_TIMING_VERBOSE) ||

tools/oneprof/CMakeLists.txt

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
include("../../build_utils/CMakeLists.txt")
2+
SetRequiredCMakeVersion()
3+
cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION})
4+
5+
project(PTI_Tools_OneProf CXX)
6+
SetCompilerFlags()
7+
SetBuildType()
8+
9+
# Tool Library
10+
11+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPTI_KERNEL_INTERVALS=1")
12+
add_library(oneprof_tool SHARED
13+
"${PROJECT_SOURCE_DIR}/../../loader/init.cc"
14+
"${PROJECT_SOURCE_DIR}/../utils/correlator.cc"
15+
tool.cc)
16+
target_include_directories(oneprof_tool
17+
PRIVATE "${PROJECT_SOURCE_DIR}"
18+
PRIVATE "${PROJECT_SOURCE_DIR}/../ze_tracer"
19+
PRIVATE "${PROJECT_SOURCE_DIR}/../utils"
20+
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
21+
if(CMAKE_INCLUDE_PATH)
22+
target_include_directories(oneprof_tool
23+
PUBLIC "${CMAKE_INCLUDE_PATH}")
24+
endif()
25+
26+
#FindOpenCLLibrary(oneprof_tool)
27+
#FindOpenCLHeaders(oneprof_tool)
28+
29+
#GetOpenCLTracingHeaders(oneprof_tool)
30+
31+
FindL0Library(oneprof_tool)
32+
FindL0Headers(oneprof_tool)
33+
34+
CheckForMDLibrary(oneprof_tool)
35+
CheckForMetricsLibrary(oneprof_tool)
36+
37+
# Loader
38+
39+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=oneprof_tool")
40+
add_executable(oneprof "${PROJECT_SOURCE_DIR}/../../loader/loader.cc")
41+
target_include_directories(oneprof
42+
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
43+
if(UNIX)
44+
target_link_libraries(oneprof
45+
dl)
46+
endif()
47+
48+
# Installation
49+
50+
install(TARGETS oneprof oneprof_tool DESTINATION bin)

0 commit comments

Comments
 (0)