Skip to content

Commit b4bc9de

Browse files
committed
Add 'vardir' disk utilization collecting
Added for 'vardir' path disk disk utilization collecting routine which parses file: /proc/diskstats for the value milliseconds spent doing I/Os from the given device and counts 'busy' value in percents, like iostat tool does for its '%util' field. Check for more information linux kernel documentation [1]. We use from the 'diskstats' file Field 10 which is in real at the 12th position: Field 9 -- # of I/Os currently in progress The only field that should go to zero. Incremented as requests are given to appropriate struct request_queue and decremented as they finish. Field 10 -- # of milliseconds spent doing I/Os This field increases so long as field 9 is nonzero. Decided to collect 'vardir' disk utilization per each run test in standalone file and print to stdout after testing completed, up to 10 most biggest of it. To count 'vardir' disk utilization for each test we need to know what was the value of the disk bound on the test start and time when test started to count later the utilization. On test finish we check the test finish time and current disk bound and count disk utilization. All of these checks and counts implemented as the new routine 'get_disk_bound_stat_busy()' in 'lib/utils.py' file. To collect disk bounds and durations on test start/finish events used 'StatisticsWatcher' listener which has the following usable routines for it: process_result() Using 'WorkerCurrentTask' queue message receive event to save the disk bounds values at the start of the tests. Using 'WorkerTaskResult' queue message receive event to save final disk bounds values for finished tests. print_statistics() - statistics printing to stdout after testing. Prints disk bounds for failed tests and up to 10 most used it tests. We use standalone 'statistics' directory in 'vardir' path to save 'durations.log' file in it with disk bounds for each tested task in format: <test task name> <disk bound> Needed for tarantool/tarantool-qa#97 [1]: https://www.kernel.org/doc/Documentation/iostats.txt
1 parent e0276d1 commit b4bc9de

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

lib/utils.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,55 @@ def get_proc_stat_rss(pid):
248248
return rss
249249

250250

251+
def get_vardir_device(vardir):
252+
dev = 'unknown'
253+
path = os.path.abspath(vardir)
254+
while not os.path.ismount(path):
255+
path = os.path.dirname(path)
256+
try:
257+
with open('/proc/mounts', 'r') as f:
258+
for line in f:
259+
if line.split()[1] == path:
260+
mount = line.split()[0]
261+
dev = mount.split('/')[2]
262+
except (OSError, IOError):
263+
pass
264+
return dev
265+
266+
267+
# This routine gets the value milliseconds spent doing I/Os
268+
# from the given 'vardir' device found at '/proc/diskstats'
269+
# and counts 'busy' value in percents like iostat tool does
270+
# for its '%util' field.
271+
#
272+
# Check for more information linux kernel documentation:
273+
# https://www.kernel.org/doc/Documentation/iostats.txt
274+
#
275+
# We use Field 10 which has the 12th position in the file:
276+
#
277+
# Field 9 -- # of I/Os currently in progress
278+
# The only field that should go to zero. Incremented as requests are
279+
# given to appropriate struct request_queue and decremented as they finish.
280+
#
281+
# Field 10 -- # of milliseconds spent doing I/Os
282+
# This field increases so long as field 9 is nonzero.
283+
def get_disk_bound_stat_busy(devname, previous):
284+
busy = 'unknown'
285+
times = time.time()
286+
try:
287+
with open('/proc/diskstats', 'r') as f:
288+
for line in f:
289+
if line.split()[2] == devname:
290+
value = line.split()[12]
291+
if previous[1] == 'unknown':
292+
previous[1] = value
293+
busy = 100 * (int(value) - int(previous[1])) / \
294+
(1024 * (times - previous[0]))
295+
except (OSError, IOError):
296+
pass
297+
return busy, [times, value]
298+
299+
251300
def set_fd_cloexec(socket):
252301
flags = fcntl.fcntl(socket, fcntl.F_GETFD)
253302
fcntl.fcntl(socket, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC)

listeners.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import sys
33
import yaml
44
import shutil
5+
import time
56

67
from lib import Options
78
from lib.colorer import color_stdout
@@ -16,6 +17,8 @@
1617
from lib.utils import safe_makedirs
1718
from lib.utils import print_tail_n
1819
from lib.utils import print_unidiff
20+
from lib.utils import get_vardir_device
21+
from lib.utils import get_disk_bound_stat_busy
1922

2023

2124
class BaseWatcher(object):
@@ -36,11 +39,20 @@ def __init__(self, get_logfile):
3639
self.stats = dict()
3740
self.rss_summary = sampler.rss_summary()
3841
self.tasks_durations = dict()
42+
self.vardir_devname = get_vardir_device(Options().args.vardir)
43+
self.vardir_bounds = dict()
44+
self.vardir_data = dict()
3945
self.failed_tasks = []
4046
self.get_logfile = get_logfile
4147
self.long_tasks = []
4248

4349
def process_result(self, obj):
50+
# Called only once on task run initiated.
51+
if isinstance(obj, WorkerCurrentTask):
52+
task_name = obj.task_name + ((':' + obj.task_param) if obj.task_param else '')
53+
self.vardir_bounds[task_name], self.vardir_data[task_name] = get_disk_bound_stat_busy(
54+
self.vardir_devname, [time.time(), 'unknown'])
55+
4456
if not isinstance(obj, WorkerTaskResult):
4557
return
4658

@@ -59,6 +71,8 @@ def process_result(self, obj):
5971
obj.show_reproduce_content))
6072

6173
self.tasks_durations[task_name] = obj.duration
74+
self.vardir_bounds[task_name], self.vardir_data[task_name] = get_disk_bound_stat_busy(
75+
self.vardir_devname, self.vardir_data[task_name])
6276

6377
def get_failed_task_name(self, task):
6478
return task[0][0] + ((':' + task[0][1]) if task[0][1] else '')
@@ -70,6 +84,7 @@ def print_statistics(self):
7084
field_size = 60
7185
top_rss = 10
7286
top_durations = 10
87+
top_bounds = 10
7388

7489
# Prepare standalone subpath '<vardir>/statistics' for statistics files.
7590
stats_dir = os.path.join(Options().args.vardir, 'statistics')
@@ -130,6 +145,34 @@ def print_statistics(self):
130145
fd.write("{} {}\n".format(task_name, self.tasks_durations[task_name]))
131146
fd.close()
132147

148+
# Disk usage.
149+
150+
# Print to stdout disk usage statistics for all failed tasks.
151+
if self.failed_tasks:
152+
color_stdout('Disk bounds of the failed tasks:\n', schema='test_var')
153+
for task in self.failed_tasks:
154+
task_name = self.get_failed_task_name(task)
155+
if task_name in self.vardir_bounds:
156+
color_stdout('* %3d %s %s\n' % (self.vardir_bounds[task_name],
157+
task_name.ljust(field_size), self.get_long_mark(task_name)),
158+
schema='test_var')
159+
160+
# Print to stdout disk bounds statistics for some number of most it used tasks.
161+
color_stdout('Up to {} most disk bounds used by tasks:\n'.format(top_bounds),
162+
schema='test_var')
163+
results_sorted = sorted(self.vardir_bounds.items(), key=lambda x: x[1], reverse=True)
164+
for task_name, bounds in results_sorted[:top_bounds]:
165+
color_stdout('* %3d %s %s\n' % (bounds, task_name.ljust(field_size),
166+
self.get_long_mark(task_name)), schema='test_var')
167+
color_stdout('-' * 41, "\n", schema='separator')
168+
169+
# Print disk bounds statistics to '<vardir>/statistics/diskbounds.log' file.
170+
filepath = os.path.join(stats_dir, 'diskbounds.log')
171+
fd = open(filepath, 'w')
172+
for task_name in self.vardir_bounds:
173+
fd.write("{} {}\n".format(task_name, self.vardir_bounds[task_name]))
174+
fd.close()
175+
133176
"""Returns are there failed tasks."""
134177
if self.stats:
135178
color_stdout('Statistics:\n', schema='test_var')

0 commit comments

Comments
 (0)