1#!/usr/bin/python 2# Copyright (C) 2015-2022 Free Software Foundation, Inc. 3# This file is part of the GNU C Library. 4# 5# The GNU C Library is free software; you can redistribute it and/or 6# modify it under the terms of the GNU Lesser General Public 7# License as published by the Free Software Foundation; either 8# version 2.1 of the License, or (at your option) any later version. 9# 10# The GNU C Library is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13# Lesser General Public License for more details. 14# 15# You should have received a copy of the GNU Lesser General Public 16# License along with the GNU C Library; if not, see 17# <https://www.gnu.org/licenses/>. 18"""Functions to import benchmark data and process it""" 19 20import json 21try: 22 import jsonschema as validator 23except ImportError: 24 print('Could not find jsonschema module.') 25 raise 26 27 28def mean(lst): 29 """Compute and return mean of numbers in a list 30 31 The numpy average function has horrible performance, so implement our 32 own mean function. 33 34 Args: 35 lst: The list of numbers to average. 36 Return: 37 The mean of members in the list. 38 """ 39 return sum(lst) / len(lst) 40 41 42def split_list(bench, func, var): 43 """ Split the list into a smaller set of more distinct points 44 45 Group together points such that the difference between the smallest 46 point and the mean is less than 1/3rd of the mean. This means that 47 the mean is at most 1.5x the smallest member of that group. 48 49 mean - xmin < mean / 3 50 i.e. 2 * mean / 3 < xmin 51 i.e. mean < 3 * xmin / 2 52 53 For an evenly distributed group, the largest member will be less than 54 twice the smallest member of the group. 55 Derivation: 56 57 An evenly distributed series would be xmin, xmin + d, xmin + 2d... 58 59 mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n 60 and max element is xmin + (n - 1) * d 61 62 Now, mean < 3 * xmin / 2 63 64 3 * xmin > 2 * mean 65 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n 66 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d 67 n * xmin > n * (n - 1) * d 68 xmin > (n - 1) * d 69 2 * xmin > xmin + (n-1) * d 70 2 * xmin > xmax 71 72 Hence, proved. 73 74 Similarly, it is trivial to prove that for a similar aggregation by using 75 the maximum element, the maximum element in the group must be at most 4/3 76 times the mean. 77 78 Args: 79 bench: The benchmark object 80 func: The function name 81 var: The function variant name 82 """ 83 means = [] 84 lst = bench['functions'][func][var]['timings'] 85 last = len(lst) - 1 86 while lst: 87 for i in range(last + 1): 88 avg = mean(lst[i:]) 89 if avg > 0.75 * lst[last]: 90 means.insert(0, avg) 91 lst = lst[:i] 92 last = i - 1 93 break 94 bench['functions'][func][var]['timings'] = means 95 96 97def do_for_all_timings(bench, callback): 98 """Call a function for all timing objects for each function and its 99 variants. 100 101 Args: 102 bench: The benchmark object 103 callback: The callback function 104 """ 105 for func in bench['functions'].keys(): 106 for k in bench['functions'][func].keys(): 107 if 'timings' not in bench['functions'][func][k].keys(): 108 continue 109 110 callback(bench, func, k) 111 112 113def compress_timings(points): 114 """Club points with close enough values into a single mean value 115 116 See split_list for details on how the clubbing is done. 117 118 Args: 119 points: The set of points. 120 """ 121 do_for_all_timings(points, split_list) 122 123 124def parse_bench(filename, schema_filename): 125 """Parse the input file 126 127 Parse and validate the json file containing the benchmark outputs. Return 128 the resulting object. 129 Args: 130 filename: Name of the benchmark output file. 131 Return: 132 The bench dictionary. 133 """ 134 with open(schema_filename, 'r') as schemafile: 135 schema = json.load(schemafile) 136 with open(filename, 'r') as benchfile: 137 bench = json.load(benchfile) 138 validator.validate(bench, schema) 139 return bench 140