From 4edad84b626c7bde5775e34e70bda67585e2bfe0 Mon Sep 17 00:00:00 2001 From: "Alexander V. Tikhonov" Date: Thu, 6 May 2021 06:40:17 +0000 Subject: [PATCH] Add 'vardir' disk utilization collecting Added for 'vardir' path disk disk utilization collecting routine which parses file: /proc/diskstats for the value milliseconds spent doing I/Os from the given device and counts 'busy' value in percents, like iostat tool does for its '%util' field. Check for more information linux kernel documentation [1]. We use from the 'diskstats' file Field 10 which is in real at the 12th position: Field 9 -- # of I/Os currently in progress The only field that should go to zero. Incremented as requests are given to appropriate struct request_queue and decremented as they finish. Field 10 -- # of milliseconds spent doing I/Os This field increases so long as field 9 is nonzero. Decided to collect 'vardir' disk utilization per each run test in standalone file and print to stdout after testing completed, up to 10 most biggest of it. To count 'vardir' disk utilization for each test we need to know what was the value of the disk bound on the test start and time when test started to count later the utilization. On test finish we check the test finish time and current disk bound and count disk utilization. All of these checks and counts implemented as the new routine 'get_disk_bound_stat_busy()' in 'lib/utils.py' file. To collect disk bounds and durations on test start/finish events used 'StatisticsWatcher' listener which has the following usable routines for it: process_result() Using 'WorkerCurrentTask' queue message apperance to save the disk bounds values at the start of the tests. Using 'WorkerTaskResult' queue message appearance to save final disk bounds values for finished tests. print_statistics() - statistics printing to stdout after testing. Prints disk bounds for failed tests and up to 10 most used it tests. We use standalone 'statistics' directory in 'vardir' path to save 'durations.log' file in it with disk bounds for each tested task in format: Part of tarantool/tarantool-qa#97 Closes #308 [1]: https://www.kernel.org/doc/Documentation/iostats.txt --- lib/utils.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ listeners.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/lib/utils.py b/lib/utils.py index bdf23654..94b7fd42 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -256,6 +256,64 @@ def get_proc_stat_rss(pid): return rss +def proc_diskstats_supported(vardir): + return get_vardir_device(vardir) != 'unknown' and os.path.isfile('/proc/diskstats') + + +def get_vardir_device(vardir): + dev = 'unknown' + path = os.path.abspath(vardir) + while not os.path.ismount(path): + path = os.path.dirname(path) + try: + with open('/proc/mounts', 'r') as f: + for line in f: + if line.split()[1] == path: + mount = line.split()[0] + dev = mount.split('/')[2] + except (OSError, IOError, IndexError): + pass + return dev + + +# This routine gets the value milliseconds spent doing I/Os +# from the given 'vardir' device found at '/proc/diskstats' +# and counts 'busy' value in percents like iostat tool does +# for its '%util' field. +# +# Check for more information linux kernel documentation: +# https://www.kernel.org/doc/Documentation/iostats.txt +# +# We use Field 10 which has the 12th position in the file: +# +# Field 9 -- # of I/Os currently in progress +# The only field that should go to zero. Incremented as requests are +# given to appropriate struct request_queue and decremented as they finish. +# +# Field 10 -- # of milliseconds spent doing I/Os +# This field increases so long as field 9 is nonzero. +def get_disk_bound_stat_busy(devname, previous): + """Function options are: + devname - initialy got by get_vardir_device() + previous - array of 2 fields: + [0] - previous time value + [1] - previous milliseconds spent doing I/Os value + """ + busy = 0 + times = time.time() + value = 0 + try: + with open('/proc/diskstats', 'r') as f: + for line in f: + if line.split()[2] == devname: + value = line.split()[12] + busy = 100 * (int(value) - int(previous[1])) / \ + (1024 * (times - previous[0])) + except (OSError, IOError): + pass + return busy, [times, value] + + def set_fd_cloexec(socket): flags = fcntl.fcntl(socket, fcntl.F_GETFD) fcntl.fcntl(socket, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) diff --git a/listeners.py b/listeners.py index 107e57e1..45bd1576 100644 --- a/listeners.py +++ b/listeners.py @@ -2,6 +2,7 @@ import sys import yaml import shutil +import time from lib import Options from lib.colorer import color_stdout @@ -16,6 +17,9 @@ from lib.utils import safe_makedirs from lib.utils import print_tail_n from lib.utils import print_unidiff +from lib.utils import proc_diskstats_supported +from lib.utils import get_vardir_device +from lib.utils import get_disk_bound_stat_busy class BaseWatcher(object): @@ -37,11 +41,23 @@ def __init__(self, get_logfile): self.field_size = 60 self._sampler = sampler self.duration_stats = dict() + self.vardir = Options().args.vardir + self.proc_diskstats_supported = proc_diskstats_supported(self.vardir) + if self.proc_diskstats_supported: + self.vardir_devname = get_vardir_device(self.vardir) + self.vardir_usage = dict() + self.vardir_data = dict() self.failed_tasks = [] self.get_logfile = get_logfile self.long_tasks = set() def process_result(self, obj): + # Called only once on task run initiated. + if self.proc_diskstats_supported and isinstance(obj, WorkerCurrentTask): + task_id = (obj.task_name, obj.task_param) + self.vardir_usage[task_id], self.vardir_data[task_id] = get_disk_bound_stat_busy( + self.vardir_devname, [time.time(), 0]) + if not isinstance(obj, WorkerTaskResult): return @@ -59,6 +75,10 @@ def process_result(self, obj): obj.show_reproduce_content)) self.duration_stats[obj.task_id] = obj.duration + if self.proc_diskstats_supported: + self.vardir_usage[obj.task_id], self.vardir_data[obj.task_id] = \ + get_disk_bound_stat_busy(self.vardir_devname, + self.vardir_data[obj.task_id]) def get_long_mark(self, task): return '(long)' if task in self.long_tasks else '' @@ -145,6 +165,42 @@ def print_duration(self, stats_dir): self.duration_stats[task_id])) fd.close() + # Disk usage. + def print_disk_usage_summary(self, stats_dir): + if not self.proc_diskstats_supported: + return + + top_usage = 10 + + # Print to stdout disk usage statistics for all failed tasks. + if self.failed_tasks: + color_stdout('Disk usage of failed tests:\n', schema='info') + for task in self.failed_tasks: + task_id = task[0] + if task_id in self.vardir_usage: + color_stdout('* %3d %s %s\n' % (self.vardir_usage[task_id], + self.prettify_task_name(task_id).ljust(self.field_size), + self.get_long_mark(task_id)), + schema='info') + + # Print to stdout disk usage statistics for some number of most it used tasks. + color_stdout('Top {} tests by disk usage:\n'.format(top_usage), schema='info') + results_sorted = sorted(self.vardir_usage.items(), key=lambda x: x[1], reverse=True) + for task_id, usage in results_sorted[:top_usage]: + color_stdout('* %3d %s %s\n' % (usage, + self.prettify_task_name(task_id).ljust(self.field_size), + self.get_long_mark(task_id)), schema='info') + + color_stdout('-' * 81, "\n", schema='separator') + + # Print disk usage statistics to '/statistics/diskusage.log' file. + filepath = os.path.join(stats_dir, 'diskusage.log') + fd = open(filepath, 'w') + for task_id in self.vardir_usage: + fd.write("{} {}\n".format(self.prettify_task_name(task_id), + self.vardir_usage[task_id])) + fd.close() + def print_statistics(self): """Print statistics and results of testing.""" # Prepare standalone subpath '/statistics' for statistics files. @@ -153,6 +209,7 @@ def print_statistics(self): self.print_rss_summary(stats_dir) self.print_duration(stats_dir) + self.print_disk_usage_summary(stats_dir) if self.stats: color_stdout('Statistics:\n', schema='test_var')