1#!/usr/bin/python 2# Copyright (C) 2015-2022 Free Software Foundation, Inc. 3# This file is part of the GNU C Library. 4# 5# The GNU C Library is free software; you can redistribute it and/or 6# modify it under the terms of the GNU Lesser General Public 7# License as published by the Free Software Foundation; either 8# version 2.1 of the License, or (at your option) any later version. 9# 10# The GNU C Library is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13# Lesser General Public License for more details. 14# 15# You should have received a copy of the GNU Lesser General Public 16# License along with the GNU C Library; if not, see 17# <https://www.gnu.org/licenses/>. 18"""Compare two benchmark results 19 20Given two benchmark result files and a threshold, this script compares the 21benchmark results and flags differences in performance beyond a given 22threshold. 23""" 24import sys 25import os 26import pylab 27import import_bench as bench 28import argparse 29 30def do_compare(func, var, tl1, tl2, par, threshold): 31 """Compare one of the aggregate measurements 32 33 Helper function to compare one of the aggregate measurements of a function 34 variant. 35 36 Args: 37 func: Function name 38 var: Function variant name 39 tl1: The first timings list 40 tl2: The second timings list 41 par: The aggregate to measure 42 threshold: The threshold for differences, beyond which the script should 43 print a warning. 44 """ 45 try: 46 v1 = tl1[str(par)] 47 v2 = tl2[str(par)] 48 d = abs(v2 - v1) * 100 / v1 49 except KeyError: 50 sys.stderr.write('%s(%s)[%s]: stat does not exist\n' % (func, var, par)) 51 return 52 except ZeroDivisionError: 53 return 54 55 if d > threshold: 56 if v1 > v2: 57 ind = '+++' 58 else: 59 ind = '---' 60 print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' % 61 (ind, func, var, par, d, v1, v2)) 62 63 64def compare_runs(pts1, pts2, threshold, stats): 65 """Compare two benchmark runs 66 67 Args: 68 pts1: Timing data from first machine 69 pts2: Timing data from second machine 70 """ 71 72 # XXX We assume that the two benchmarks have identical functions and 73 # variants. We cannot compare two benchmarks that may have different 74 # functions or variants. Maybe that is something for the future. 75 for func in pts1['functions'].keys(): 76 for var in pts1['functions'][func].keys(): 77 tl1 = pts1['functions'][func][var] 78 tl2 = pts2['functions'][func][var] 79 80 # Compare the consolidated numbers 81 # do_compare(func, var, tl1, tl2, 'max', threshold) 82 for stat in stats.split(): 83 do_compare(func, var, tl1, tl2, stat, threshold) 84 85 # Skip over to the next variant or function if there is no detailed 86 # timing info for the function variant. 87 if 'timings' not in pts1['functions'][func][var].keys() or \ 88 'timings' not in pts2['functions'][func][var].keys(): 89 continue 90 91 # If two lists do not have the same length then it is likely that 92 # the performance characteristics of the function have changed. 93 # XXX: It is also likely that there was some measurement that 94 # strayed outside the usual range. Such ouiers should not 95 # happen on an idle machine with identical hardware and 96 # configuration, but ideal environments are hard to come by. 97 if len(tl1['timings']) != len(tl2['timings']): 98 print('* %s(%s): Timing characteristics changed' % 99 (func, var)) 100 print('\tBefore: [%s]' % 101 ', '.join([str(x) for x in tl1['timings']])) 102 print('\tAfter: [%s]' % 103 ', '.join([str(x) for x in tl2['timings']])) 104 continue 105 106 # Collect numbers whose differences cross the threshold we have 107 # set. 108 issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \ 109 if abs(y - x) * 100 / x > threshold] 110 111 # Now print them. 112 for t1, t2 in issues: 113 d = abs(t2 - t1) * 100 / t1 114 if t2 > t1: 115 ind = '-' 116 else: 117 ind = '+' 118 119 print("%s %s(%s): (%.2lf%%) from %g to %g" % 120 (ind, func, var, d, t1, t2)) 121 122 123def plot_graphs(bench1, bench2): 124 """Plot graphs for functions 125 126 Make scatter plots for the functions and their variants. 127 128 Args: 129 bench1: Set of points from the first machine 130 bench2: Set of points from the second machine. 131 """ 132 for func in bench1['functions'].keys(): 133 for var in bench1['functions'][func].keys(): 134 # No point trying to print a graph if there are no detailed 135 # timings. 136 if u'timings' not in bench1['functions'][func][var].keys(): 137 sys.stderr.write('Skipping graph for %s(%s)\n' % (func, var)) 138 continue 139 140 pylab.clf() 141 pylab.ylabel('Time (cycles)') 142 143 # First set of points 144 length = len(bench1['functions'][func][var]['timings']) 145 X = [float(x) for x in range(length)] 146 lines = pylab.scatter(X, bench1['functions'][func][var]['timings'], 147 1.5 + 100 / length) 148 pylab.setp(lines, 'color', 'r') 149 150 # Second set of points 151 length = len(bench2['functions'][func][var]['timings']) 152 X = [float(x) for x in range(length)] 153 lines = pylab.scatter(X, bench2['functions'][func][var]['timings'], 154 1.5 + 100 / length) 155 pylab.setp(lines, 'color', 'g') 156 157 if var: 158 filename = "%s-%s.png" % (func, var) 159 else: 160 filename = "%s.png" % func 161 sys.stderr.write('Writing out %s' % filename) 162 pylab.savefig(filename) 163 164def main(bench1, bench2, schema, threshold, stats): 165 bench1 = bench.parse_bench(bench1, schema) 166 bench.do_for_all_timings(bench1, lambda b, f, v: 167 b['functions'][f][v]['timings'].sort()) 168 bench2 = bench.parse_bench(bench2, schema) 169 bench.do_for_all_timings(bench2, lambda b, f, v: 170 b['functions'][f][v]['timings'].sort()) 171 172 plot_graphs(bench1, bench2) 173 174 bench.compress_timings(bench1) 175 bench.compress_timings(bench2) 176 177 compare_runs(bench1, bench2, threshold, stats) 178 179 180if __name__ == '__main__': 181 parser = argparse.ArgumentParser(description='Take two benchmark and compare their timings.') 182 183 # Required parameters 184 parser.add_argument('bench1', help='First bench to compare') 185 parser.add_argument('bench2', help='Second bench to compare') 186 187 # Optional parameters 188 parser.add_argument('--schema', 189 default=os.path.join(os.path.dirname(os.path.realpath(__file__)),'benchout.schema.json'), 190 help='JSON file to validate source/dest files (default: %(default)s)') 191 parser.add_argument('--threshold', default=10.0, type=float, help='Only print those with equal or higher threshold (default: %(default)s)') 192 parser.add_argument('--stats', default='min mean', type=str, help='Only consider values from the statistics specified as a space separated list (default: %(default)s)') 193 194 args = parser.parse_args() 195 196 main(args.bench1, args.bench2, args.schema, args.threshold, args.stats) 197