1#!/usr/bin/python
2# Copyright (C) 2015-2022 Free Software Foundation, Inc.
3# This file is part of the GNU C Library.
4#
5# The GNU C Library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9#
10# The GNU C Library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU Lesser General Public
16# License along with the GNU C Library; if not, see
17# <https://www.gnu.org/licenses/>.
18"""Functions to import benchmark data and process it"""
19
20import json
21try:
22    import jsonschema as validator
23except ImportError:
24    print('Could not find jsonschema module.')
25    raise
26
27
28def mean(lst):
29    """Compute and return mean of numbers in a list
30
31    The numpy average function has horrible performance, so implement our
32    own mean function.
33
34    Args:
35        lst: The list of numbers to average.
36    Return:
37        The mean of members in the list.
38    """
39    return sum(lst) / len(lst)
40
41
42def split_list(bench, func, var):
43    """ Split the list into a smaller set of more distinct points
44
45    Group together points such that the difference between the smallest
46    point and the mean is less than 1/3rd of the mean.  This means that
47    the mean is at most 1.5x the smallest member of that group.
48
49    mean - xmin < mean / 3
50    i.e. 2 * mean / 3 < xmin
51    i.e. mean < 3 * xmin / 2
52
53    For an evenly distributed group, the largest member will be less than
54    twice the smallest member of the group.
55    Derivation:
56
57    An evenly distributed series would be xmin, xmin + d, xmin + 2d...
58
59    mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
60    and max element is xmin + (n - 1) * d
61
62    Now, mean < 3 * xmin / 2
63
64    3 * xmin > 2 * mean
65    3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
66    3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
67    n * xmin > n * (n - 1) * d
68    xmin > (n - 1) * d
69    2 * xmin > xmin + (n-1) * d
70    2 * xmin > xmax
71
72    Hence, proved.
73
74    Similarly, it is trivial to prove that for a similar aggregation by using
75    the maximum element, the maximum element in the group must be at most 4/3
76    times the mean.
77
78    Args:
79        bench: The benchmark object
80        func: The function name
81        var: The function variant name
82    """
83    means = []
84    lst = bench['functions'][func][var]['timings']
85    last = len(lst) - 1
86    while lst:
87        for i in range(last + 1):
88            avg = mean(lst[i:])
89            if avg > 0.75 * lst[last]:
90                means.insert(0, avg)
91                lst = lst[:i]
92                last = i - 1
93                break
94    bench['functions'][func][var]['timings'] = means
95
96
97def do_for_all_timings(bench, callback):
98    """Call a function for all timing objects for each function and its
99    variants.
100
101    Args:
102        bench: The benchmark object
103        callback: The callback function
104    """
105    for func in bench['functions'].keys():
106        for k in bench['functions'][func].keys():
107            if 'timings' not in bench['functions'][func][k].keys():
108                continue
109
110            callback(bench, func, k)
111
112
113def compress_timings(points):
114    """Club points with close enough values into a single mean value
115
116    See split_list for details on how the clubbing is done.
117
118    Args:
119        points: The set of points.
120    """
121    do_for_all_timings(points, split_list)
122
123
124def parse_bench(filename, schema_filename):
125    """Parse the input file
126
127    Parse and validate the json file containing the benchmark outputs.  Return
128    the resulting object.
129    Args:
130        filename: Name of the benchmark output file.
131    Return:
132        The bench dictionary.
133    """
134    with open(schema_filename, 'r') as schemafile:
135        schema = json.load(schemafile)
136        with open(filename, 'r') as benchfile:
137            bench = json.load(benchfile)
138            validator.validate(bench, schema)
139            return bench
140