From patchwork Fri Apr 30 21:20:31 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lucas Meneghel Rodrigues X-Patchwork-Id: 96070 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o3ULKhnR002734 for ; Fri, 30 Apr 2010 21:20:43 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1759599Ab0D3VUj (ORCPT ); Fri, 30 Apr 2010 17:20:39 -0400 Received: from mx1.redhat.com ([209.132.183.28]:27851 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1759567Ab0D3VUi (ORCPT ); Fri, 30 Apr 2010 17:20:38 -0400 Received: from int-mx03.intmail.prod.int.phx2.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.16]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o3ULKaoq029157 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Fri, 30 Apr 2010 17:20:36 -0400 Received: from localhost.localdomain (vpn-9-198.rdu.redhat.com [10.11.9.198]) by int-mx03.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o3ULKXD6008154; Fri, 30 Apr 2010 17:20:34 -0400 From: Lucas Meneghel Rodrigues To: autotest@test.kernel.org Cc: kvm@vger.kernel.org, Lucas Meneghel Rodrigues Subject: [PATCH 1/2] IOzone test: Introduce postprocessing module Date: Fri, 30 Apr 2010 18:20:31 -0300 Message-Id: <1272662432-27875-1-git-send-email-lmr@redhat.com> X-Scanned-By: MIMEDefang 2.67 on 10.5.11.16 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Fri, 30 Apr 2010 21:20:45 +0000 (UTC) diff --git a/client/tests/iozone/postprocessing.py b/client/tests/iozone/postprocessing.py new file mode 100755 index 0000000..b495502 --- /dev/null +++ b/client/tests/iozone/postprocessing.py @@ -0,0 +1,487 @@ +#!/usr/bin/python +""" +Postprocessing module for IOzone. It is capable to pick results from an +IOzone run, calculate the geometric mean for all throughput results for +a given file size or record size, and then generate a series of 2D and 3D +graphs. The graph generation functionality depends on gnuplot, and if it +is not present, functionality degrates gracefully. + +@copyright: Red Hat 2010 +""" +import os, sys, optparse, logging, math, time +import common +from autotest_lib.client.common_lib import logging_config, logging_manager +from autotest_lib.client.common_lib import error +from autotest_lib.client.bin import utils, os_dep + + +_LABELS = ('file_size', 'record_size', 'write', 'rewrite', 'read', 'reread', + 'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread', + 'fwrite', 'frewrite', 'fread', 'freread') + + +def unique(list): + """ + Return a list of the elements in list, but without duplicates. + + @param list: List with values. + @return: List with non duplicate elements. + """ + n = len(list) + if n == 0: + return [] + u = {} + try: + for x in list: + u[x] = 1 + except TypeError: + return None + else: + return u.keys() + + +def geometric_mean(values): + """ + Evaluates the geometric mean for a list of numeric values. + + @param values: List with values. + @return: Single value representing the geometric mean for the list values. + @see: http://en.wikipedia.org/wiki/Geometric_mean + """ + try: + values = [int(value) for value in values] + except ValueError: + return None + product = 1 + n = len(values) + if n == 0: + return None + return math.exp(sum([math.log(x) for x in values])/n) + + +def compare_matrices(matrix1, matrix2, treshold=0.05): + """ + Compare 2 matrices nxm and return a matrix nxm with comparison data + + @param matrix1: Reference Matrix with numeric data + @param matrix2: Matrix that will be compared + @param treshold: Any difference bigger than this percent treshold will be + reported. + """ + improvements = 0 + regressions = 0 + same = 0 + comparison_matrix = [] + + new_matrix = [] + for line1, line2 in zip(matrix1, matrix2): + new_line = [] + for element1, element2 in zip(line1, line2): + ratio = float(element2) / float(element1) + if ratio < (1 - treshold): + regressions += 1 + new_line.append((100 * ratio - 1) - 100) + elif ratio > (1 + treshold): + improvements += 1 + new_line.append("+" + str((100 * ratio - 1) - 100)) + else: + same + 1 + if line1.index(element1) == 0: + new_line.append(element1) + else: + new_line.append(".") + new_matrix.append(new_line) + + total = improvements + regressions + same + + return (new_matrix, improvements, regressions, total) + + +class IOzoneAnalyzer(object): + """ + Analyze an unprocessed IOzone file, and generate the following types of + report: + + * Summary of throughput for all file and record sizes combined + * Summary of throughput for all file sizes + * Summary of throughput for all record sizes + + If more than one file is provided to the analyzer object, a comparison + between the two runs is made, searching for regressions in performance. + """ + def __init__(self, list_files, output_dir): + self.list_files = list_files + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + self.output_dir = output_dir + logging.info("Results will be stored in %s", output_dir) + + + def average_performance(self, results, size=None): + """ + Flattens a list containing performance results. + + @param results: List of n lists containing data from performance runs. + @param size: Numerical value of a size (say, file_size) that was used + to filter the original results list. + @return: List with 1 list containing average data from the performance + run. + """ + average_line = [] + if size is not None: + average_line.append(size) + for i in range(2, 15): + average = geometric_mean([line[i] for line in results]) / 1024.0 + average = int(average) + average_line.append(average) + return average_line + + + def process_results(self, results, label=None): + """ + Process a list of IOzone results according to label. + + @label: IOzone column label that we'll use to filter and compute + geometric mean results, in practical term either 'file_size' + or 'record_size'. + @result: A list of n x m columns with original iozone results. + @return: A list of n-? x (m-1) columns with geometric averages for + values of each label (ex, average for all file_sizes). + """ + performance = [] + if label is not None: + index = _LABELS.index(label) + sizes = unique([line[index] for line in results]) + sizes.sort() + for size in sizes: + r_results = [line for line in results if line[index] == size] + performance.append(self.average_performance(r_results, size)) + else: + performance.append(self.average_performance(results)) + + return performance + + + def parse_file(self, file): + """ + Parse an IOzone results file. + + @param file: File object that will be parsed. + @return: Matrix containing IOzone results extracted from the file. + """ + lines = [] + for line in file.readlines(): + fields = line.split() + if len(fields) != 15: + continue + try: + lines.append([int(i) for i in fields]) + except ValueError: + continue + return lines + + + def report(self, overall_results, record_size_results, file_size_results): + """ + Generates analysis data for IOZone run. + + Generates a report to both logs (where it goes with nice headers) and + output files for further processing (graph generation). + + @param overall_results: 1x15 Matrix containing IOzone results for all + file sizes + @param record_size_results: nx15 Matrix containing IOzone results for + each record size tested. + @param file_size_results: nx15 Matrix containing file size results + for each file size tested. + """ + # Here we'll use the logging system to put the output of our analysis + # to files + logger = logging.getLogger() + formatter = logging.Formatter("") + + logging.info("") + logging.info("TABLE: SUMMARY of ALL FILE and RECORD SIZES Results in MB/sec") + logging.info("") + logging.info("FILE & RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE") + logging.info("SIZES (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") + logging.info("-------------------------------------------------------------------------------------------------------------------") + for result_line in overall_results: + logging.info("ALL %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) + logging.info("") + + logging.info("DRILLED DATA:") + + logging.info("") + logging.info("TABLE: RECORD Size against all FILE Sizes Results in MB/sec") + logging.info("") + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") + logging.info("--------------------------------------------------------------------------------------------------------------") + + foutput_path = os.path.join(self.output_dir, '2d-datasource-file') + if os.path.isfile(foutput_path): + os.unlink(foutput_path) + foutput = logging.FileHandler(foutput_path) + foutput.setFormatter(formatter) + logger.addHandler(foutput) + for result_line in record_size_results: + logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) + logger.removeHandler(foutput) + + logging.info("") + + logging.info("") + logging.info("TABLE: FILE Size against all RECORD Sizes Results in MB/sec") + logging.info("") + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") + logging.info("--------------------------------------------------------------------------------------------------------------") + + routput_path = os.path.join(self.output_dir, '2d-datasource-record') + if os.path.isfile(routput_path): + os.unlink(routput_path) + routput = logging.FileHandler(routput_path) + routput.setFormatter(formatter) + logger.addHandler(routput) + for result_line in file_size_results: + logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) + logger.removeHandler(routput) + + logging.info("") + + + def report_comparison(self, record, file): + """ + Generates comparison data for 2 IOZone runs. + + It compares 2 sets of nxm results and outputs a table with differences. + If a difference higher or smaller than 5% is found, a warning is + triggered. + + @param record: Tuple with 4 elements containing results for record size. + @param file: Tuple with 4 elements containing results for file size. + """ + (record_size, record_improvements, record_regressions, + record_total) = record + (file_size, file_improvements, file_regressions, + file_total) = file + logging.info("ANALYSIS of DRILLED DATA:") + + logging.info("") + logging.info("TABLE: RECsize Difference between runs Results are % DIFF") + logging.info("") + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") + logging.info("--------------------------------------------------------------------------------------------------------------") + for result_line in record_size: + logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) + logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", + record_regressions, + (100 * record_regressions/float(record_total)), + record_improvements, + (100 * record_improvements/float(record_total))) + logging.info("") + + logging.info("") + logging.info("TABLE: FILEsize Difference between runs Results are % DIFF") + logging.info("") + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") + logging.info("--------------------------------------------------------------------------------------------------------------") + for result_line in file_size: + logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) + logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", + file_regressions, + (100 * file_regressions/float(file_total)), + file_improvements, + (100 * file_improvements/float(file_total))) + logging.info("") + + + def analyze(self): + """ + Analyzes and eventually compares sets of IOzone data. + """ + overall = [] + record_size = [] + file_size = [] + for path in self.list_files: + file = open(path, 'r') + logging.info('FILE: %s', path) + + results = self.parse_file(file) + + overall_results = self.process_results(results) + record_size_results = self.process_results(results, 'record_size') + file_size_results = self.process_results(results, 'file_size') + self.report(overall_results, record_size_results, file_size_results) + + if len(self.list_files) == 2: + overall.append(overall_results) + record_size.append(record_size_results) + file_size.append(file_size_results) + + if len(self.list_files) == 2: + record_comparison = compare_matrices(*record_size) + file_comparison = compare_matrices(*file_size) + self.report_comparison(record_comparison, file_comparison) + + +class IOzonePlotter(object): + """ + Plots graphs based on the results of an IOzone run. + + Plots graphs based on the results of an IOzone run. Uses gnuplot to + generate the graphs. + """ + def __init__(self, results_file, output_dir): + self.active = True + try: + self.gnuplot = os_dep.command("gnuplot") + except: + logging.error("Command gnuplot not found, disabling graph " + "generation") + self.active = False + + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + self.output_dir = output_dir + + if not os.path.isfile(results_file): + logging.error("Invalid file %s provided, disabling graph " + "generation", results_file) + self.active = False + self.results_file = None + else: + self.results_file = results_file + self.generate_data_source() + + + def generate_data_source(self): + """ + Creates data file without headers for gnuplot consumption. + """ + results_file = open(self.results_file, 'r') + self.datasource = os.path.join(self.output_dir, '3d-datasource') + datasource = open(self.datasource, 'w') + for line in results_file.readlines(): + fields = line.split() + if len(fields) != 15: + continue + try: + values = [int(i) for i in fields] + datasource.write(line) + except ValueError: + continue + datasource.close() + + + def plot_2d_graphs(self): + """ + For each one of the throughput parameters, generate a set of gnuplot + commands that will create a parametric surface with file size vs. + record size vs. throughput. + """ + datasource_2d = os.path.join(self.output_dir, '2d-datasource-file') + for index, label in zip(range(1, 14), _LABELS[2:]): + commands_path = os.path.join(self.output_dir, '2d-%s.do' % label) + commands = "" + commands += "set title 'Iozone performance: %s'\n" % label + commands += "set logscale x\n" + commands += "set xlabel 'File size (KB)'\n" + commands += "set ylabel 'Througput (MB/s)'\n" + commands += "set terminal png small size 450 350\n" + commands += "set output '%s'\n" % os.path.join(self.output_dir, + '2d-%s.png' % label) + commands += ("plot '%s' using 1:%s title '%s' with lines \n" % + (datasource_2d, index, label)) + commands_file = open(commands_path, 'w') + commands_file.write(commands) + commands_file.close() + try: + utils.run("%s %s" % (self.gnuplot, commands_path)) + except error.CmdError, e: + logging.error("Problem plotting from commands file %s: %s", + commands_file, str(e)) + + + def plot_3d_graphs(self): + """ + For each one of the throughput parameters, generate a set of gnuplot + commands that will create a parametric surface with file size vs. + record size vs. throughput. + """ + for index, label in zip(range(1, 14), _LABELS[2:]): + commands_path = os.path.join(self.output_dir, '%s.do' % label) + commands = "" + commands += "set title 'Iozone performance: %s'\n" % label + commands += "set grid lt 2 lw 1\n" + commands += "set surface\n" + commands += "set parametric\n" + commands += "set xtics\n" + commands += "set ytics\n" + commands += "set logscale x 2\n" + commands += "set logscale y 2\n" + commands += "set logscale z\n" + commands += "set xrange [2.**5:2.**24]\n" + commands += "set xlabel 'File size (KB)'\n" + commands += "set ylabel 'Record size (KB)'\n" + commands += "set zlabel 'Througput (KB/s)'\n" + commands += "set data style lines\n" + commands += "set dgrid3d 80,80, 3\n" + commands += "set terminal png small size 900 700\n" + commands += "set output '%s'\n" % os.path.join(self.output_dir, + '%s.png' % label) + commands += ("splot '%s' using 1:2:%s title '%s'\n" % + (self.datasource, index, label)) + commands_file = open(commands_path, 'w') + commands_file.write(commands) + commands_file.close() + try: + utils.run("%s %s" % (self.gnuplot, commands_path)) + except error.CmdError, e: + logging.error("Problem plotting from commands file %s: %s", + commands_file, str(e)) + + + def plot_all(self): + """ + Plot all graphs that are to be plotted, provided that we have gnuplot. + """ + if self.active: + self.plot_2d_graphs() + self.plot_3d_graphs() + + +class AnalyzerLoggingConfig(logging_config.LoggingConfig): + def configure_logging(self, results_dir=None, verbose=False): + super(AnalyzerLoggingConfig, self).configure_logging(use_console=True, + verbose=verbose) + + +if __name__ == "__main__": + parser = optparse.OptionParser("usage: %prog [options] [filenames]") + options, args = parser.parse_args() + + logging_manager.configure_logging(AnalyzerLoggingConfig()) + + if args: + filenames = args + else: + parser.print_help() + sys.exit(1) + + if len(args) > 2: + parser.print_help() + sys.exit(1) + + o = os.path.join(os.getcwd(), + "iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S')) + if not os.path.isdir(o): + os.makedirs(o) + + a = IOzoneAnalyzer(list_files=filenames, output_dir=o) + a.analyze() + p = IOzonePlotter(results_file=filenames[0], output_dir=o) + p.plot_all()