You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
406 lines
22 KiB
406 lines
22 KiB
#!/usr/bin/env python3
|
|
# Copyright 2016 Google Inc. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS-IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import argparse
|
|
import json
|
|
from typing import Tuple, List, Dict, Union, Callable, Any, Sequence, Set, Iterable
|
|
|
|
import yaml
|
|
from collections import defaultdict
|
|
|
|
def extract_results(bench_results: List[Dict[str, Dict[Any, Any]]],
|
|
fixed_benchmark_params: Dict[str, Union[str, Tuple[str, ...]]],
|
|
column_dimension: str,
|
|
row_dimension: str,
|
|
result_dimension: str) -> Tuple[Dict[str, Dict[str, Dict[str, Any]]],
|
|
Set[Tuple[List[Tuple[str, str]], ...]],
|
|
Set[Tuple[Tuple[List[Tuple[str, str]], ...],
|
|
str]]]:
|
|
table_data = defaultdict(lambda: dict()) # type: Dict[str, Dict[str, Dict[str, Any]]]
|
|
remaining_dimensions_by_row_column = dict()
|
|
used_bench_results = set() # type: Set[Tuple[List[Tuple[str, str]], ...]]
|
|
used_bench_result_values = set() # type: Set[Tuple[Tuple[List[Tuple[str, str]], ...], str]]
|
|
for bench_result in bench_results:
|
|
try:
|
|
params = {dimension_name: make_immutable(dimension_value)
|
|
for dimension_name, dimension_value in bench_result['benchmark'].items()}
|
|
original_params = dict(params)
|
|
results = bench_result['results']
|
|
matches = True
|
|
if result_dimension not in results:
|
|
# result_dimension not found in this result, skip
|
|
matches = False
|
|
for param_name, param_value in fixed_benchmark_params.items():
|
|
if (isinstance(param_value, tuple) and params.get(param_name) in param_value) or (params.get(param_name) == param_value):
|
|
pass
|
|
else:
|
|
# fixed_benchmark_params not satisfied by this result, skip
|
|
matches = False
|
|
if matches:
|
|
# fixed_benchmark_params were satisfied by these params (and were removed)
|
|
assert row_dimension in params.keys(), '%s not in %s' % (row_dimension, params.keys())
|
|
assert column_dimension in params.keys(), '%s not in %s' % (column_dimension, params.keys())
|
|
assert result_dimension in results, '%s not in %s' % (result_dimension, results)
|
|
used_bench_results.add(tuple(sorted(original_params.items())))
|
|
used_bench_result_values.add((tuple(sorted(original_params.items())),
|
|
result_dimension))
|
|
row_value = params[row_dimension]
|
|
column_value = params[column_dimension]
|
|
remaining_dimensions = params.copy()
|
|
remaining_dimensions.pop(row_dimension)
|
|
remaining_dimensions.pop(column_dimension)
|
|
if column_value in table_data[row_value]:
|
|
previous_remaining_dimensions = remaining_dimensions_by_row_column[(row_value, column_value)]
|
|
raise Exception(
|
|
'Found multiple benchmark results with the same fixed benchmark params, benchmark param for row and benchmark param for column, so a result can\'t be uniquely determined. '
|
|
+ 'Consider adding additional values in fixed_benchmark_params. Remaining dimensions:\n%s\nvs\n%s' % (
|
|
remaining_dimensions, previous_remaining_dimensions))
|
|
table_data[row_value][column_value] = results[result_dimension]
|
|
remaining_dimensions_by_row_column[(row_value, column_value)] = remaining_dimensions
|
|
except Exception as e:
|
|
raise Exception('While processing %s' % bench_result) from e
|
|
return table_data, used_bench_results, used_bench_result_values
|
|
|
|
# Takes a 2-dimensional array (list of lists) and prints a markdown table with that content.
|
|
def print_markdown_table(table_data: List[List[str]]) -> None:
|
|
max_content_length_by_column = [max([len(str(row[column_index])) for row in table_data])
|
|
for column_index in range(len(table_data[0]))]
|
|
for row_index in range(len(table_data)):
|
|
row = table_data[row_index]
|
|
cell_strings = []
|
|
for column_index in range(len(row)):
|
|
value = str(row[column_index])
|
|
# E.g. if max_content_length_by_column=20, table_cell_format='%20s'
|
|
table_cell_format = '%%%ss' % max_content_length_by_column[column_index]
|
|
cell_strings += [table_cell_format % value]
|
|
print('| ' + ' | '.join(cell_strings) + ' |')
|
|
if row_index == 0:
|
|
# Print the separator line, e.g. |---|-----|---|
|
|
print('|-'
|
|
+ '-|-'.join(['-' * max_content_length_by_column[column_index]
|
|
for column_index in range(len(row))])
|
|
+ '-|')
|
|
|
|
# A sequence of length 2, with the lower and upper bound of the interval.
|
|
# TODO: use a class instead.
|
|
Interval = Sequence[float]
|
|
|
|
def compute_min_max(table_data, row_headers: List[str], column_headers: List[str]) -> Interval:
|
|
values_by_row = {row_header: [table_data[row_header][column_header]
|
|
for column_header in column_headers
|
|
if column_header in table_data[row_header]]
|
|
for row_header in row_headers}
|
|
# We compute min and max and pass it to the value pretty-printer, so that it can determine a unit that works well for all values in the table.
|
|
min_in_table = min([min([min(interval[0][0], interval[1][0]) for interval in values_by_row[row_header]])
|
|
for row_header in row_headers])
|
|
max_in_table = max([max([max(interval[0][1], interval[1][1]) for interval in values_by_row[row_header]])
|
|
for row_header in row_headers])
|
|
return (min_in_table, max_in_table)
|
|
|
|
def pretty_print_percentage_difference(baseline_value: Interval, current_value: Interval):
|
|
baseline_min = baseline_value[0]
|
|
baseline_max = baseline_value[1]
|
|
current_min = current_value[0]
|
|
current_max = current_value[1]
|
|
percentage_min = (current_min / baseline_max - 1) * 100
|
|
percentage_max = (current_max / baseline_min - 1) * 100
|
|
percentage_min_s = "%+.1f%%" % percentage_min
|
|
percentage_max_s = "%+.1f%%" % percentage_max
|
|
if percentage_min_s == percentage_max_s:
|
|
return percentage_min_s
|
|
else:
|
|
return "%s - %s" % (percentage_min_s, percentage_max_s)
|
|
|
|
DimensionPrettyPrinter = Callable[[Any], str]
|
|
|
|
IntervalPrettyPrinter = Callable[[Interval, float, float], str]
|
|
|
|
|
|
# Takes a table as a dict of dicts (where each table_data[row_key][column_key] is a confidence interval) and prints it as a markdown table using
|
|
# the specified pretty print functions for column keys, row keys and values respectively.
|
|
# column_header_pretty_printer and row_header_pretty_printer must be functions taking a single value and returning the pretty-printed version.
|
|
# value_pretty_printer must be a function taking (value_confidence_interval, min_in_table, max_in_table).
|
|
# baseline_table_data is an optional table (similar to table_data) that contains the "before" state. If present, the values in two tables will be compared.
|
|
def print_confidence_intervals_table(table_name,
|
|
table_data,
|
|
baseline_table_data,
|
|
column_header_pretty_printer: DimensionPrettyPrinter,
|
|
row_header_pretty_printer: DimensionPrettyPrinter,
|
|
value_pretty_printer: IntervalPrettyPrinter,
|
|
row_sort_key: Callable[[Any], Any]):
|
|
if table_data == {}:
|
|
print('%s: (no data)' % table_name)
|
|
return
|
|
|
|
row_headers = sorted(list(table_data.keys()), key=row_sort_key)
|
|
# We need to compute the union of the headers of all rows; some rows might be missing values for certain columns.
|
|
column_headers = sorted(set().union(*[list(row_values.keys()) for row_values in table_data.values()]))
|
|
if baseline_table_data:
|
|
baseline_row_headers = sorted(list(baseline_table_data.keys()), key=row_sort_key)
|
|
baseline_column_headers = sorted(set().union(*[list(row_values.keys()) for row_values in baseline_table_data.values()]))
|
|
unmached_baseline_column_headers = set(baseline_row_headers) - set(row_headers)
|
|
if unmached_baseline_column_headers:
|
|
print('Found baseline column headers with no match in new results (they will be ignored): ', unmached_baseline_column_headers)
|
|
unmached_baseline_row_headers = set(baseline_row_headers) - set(row_headers)
|
|
if unmached_baseline_row_headers:
|
|
print('Found baseline row headers with no match in new results (they will be ignored): ', unmached_baseline_row_headers)
|
|
|
|
min_in_table, max_in_table = compute_min_max(table_data, row_headers, column_headers)
|
|
if baseline_table_data:
|
|
min_in_baseline_table, max_in_baseline_table = compute_min_max(table_data, row_headers, column_headers)
|
|
min_in_table = min(min_in_table, min_in_baseline_table)
|
|
max_in_table = max(max_in_table, max_in_baseline_table)
|
|
|
|
table_content = []
|
|
table_content.append([table_name] + [column_header_pretty_printer(column_header) for column_header in column_headers])
|
|
for row_header in row_headers:
|
|
row_content = [row_header_pretty_printer(row_header)]
|
|
for column_header in column_headers:
|
|
if column_header in table_data[row_header]:
|
|
value = table_data[row_header][column_header]
|
|
raw_confidence_interval, rounded_confidence_interval = value
|
|
pretty_printed_value = value_pretty_printer(rounded_confidence_interval, min_in_table, max_in_table)
|
|
if baseline_table_data and row_header in baseline_table_data and column_header in baseline_table_data[row_header]:
|
|
baseline_value = baseline_table_data[row_header][column_header]
|
|
raw_baseline_confidence_interval, rounded_baseline_confidence_interval = baseline_value
|
|
pretty_printed_baseline_value = value_pretty_printer(rounded_baseline_confidence_interval, min_in_table, max_in_table)
|
|
pretty_printed_percentage_difference = pretty_print_percentage_difference(raw_baseline_confidence_interval, raw_confidence_interval)
|
|
row_content.append("%s -> %s (%s)" % (pretty_printed_baseline_value, pretty_printed_value, pretty_printed_percentage_difference))
|
|
else:
|
|
row_content.append(pretty_printed_value)
|
|
else:
|
|
row_content.append("N/A")
|
|
table_content.append(row_content)
|
|
print_markdown_table(table_content)
|
|
|
|
|
|
def format_string_pretty_printer(format_string: str) -> Callable[[str], str]:
|
|
def pretty_print(s: str):
|
|
return format_string % s
|
|
|
|
return pretty_print
|
|
|
|
def float_to_str(x: float) -> str:
|
|
if x > 100:
|
|
return str(int(x))
|
|
else:
|
|
return '%.2g' % x
|
|
|
|
def interval_pretty_printer(interval: Interval, unit: str, multiplier: float) -> str:
|
|
interval = list(interval) # type: List[Any]
|
|
interval[0] *= multiplier
|
|
interval[1] *= multiplier
|
|
|
|
# This prevents the format strings below from printing '.0' for numbers that already have 2 digits:
|
|
# 23.0 -> 23
|
|
# 2.0 -> 2.0 (here we don't remove the '.0' because printing just '2' might suggest a lower precision)
|
|
if int(interval[0]) == interval[0] and interval[0] >= 10:
|
|
interval[0] = int(interval[0])
|
|
else:
|
|
interval[0] = float_to_str(interval[0])
|
|
if int(interval[1]) == interval[1] and interval[1] >= 10:
|
|
interval[1] = int(interval[1])
|
|
else:
|
|
interval[1] = float_to_str(interval[1])
|
|
|
|
if interval[0] == interval[1]:
|
|
return '%s %s' % (interval[0], unit)
|
|
else:
|
|
return '%s-%s %s' % (interval[0], interval[1], unit)
|
|
|
|
|
|
# Finds the best unit to represent values in the range [min_value, max_value].
|
|
# The units must be specified as an ordered list [multiplier1, ..., multiplierN]
|
|
def find_best_unit(units: List[float], min_value: float, max_value: float) -> float:
|
|
assert min_value <= max_value
|
|
if max_value <= units[0]:
|
|
return units[0]
|
|
for i in range(len(units) - 1):
|
|
if min_value > units[i] and max_value < units[i + 1]:
|
|
return units[i]
|
|
if min_value > units[-1]:
|
|
return units[-1]
|
|
# There is no unit that works very well for all values, first let's try relaxing the min constraint
|
|
for i in range(len(units) - 1):
|
|
if min_value > units[i] * 0.2 and max_value < units[i + 1]:
|
|
return units[i]
|
|
if min_value > units[-1] * 0.2:
|
|
return units[-1]
|
|
# That didn't work either, just use a unit that works well for the min values then
|
|
for i in reversed(range(len(units))):
|
|
if min_value > units[i]:
|
|
return units[i]
|
|
assert min_value <= min(units)
|
|
# Pick the smallest unit
|
|
return units[0]
|
|
|
|
|
|
def time_interval_pretty_printer(time_interval: Interval, min_in_table: float, max_in_table: float) -> str:
|
|
sec = 1
|
|
milli = 0.001
|
|
micro = milli * milli
|
|
units = [micro, milli, sec]
|
|
unit_name_by_unit = {micro: 'μs', milli: 'ms', sec: 's'}
|
|
|
|
unit = find_best_unit(units, min_in_table, max_in_table)
|
|
unit_name = unit_name_by_unit[unit]
|
|
|
|
return interval_pretty_printer(time_interval, unit=unit_name, multiplier=1 / unit)
|
|
|
|
|
|
def file_size_interval_pretty_printer(file_size_interval: Interval, min_in_table: float, max_in_table: float) -> str:
|
|
byte = 1
|
|
kb = 1024
|
|
mb = kb * kb
|
|
units = [byte, kb, mb]
|
|
unit_name_by_unit = {byte: 'bytes', kb: 'KB', mb: 'MB'}
|
|
|
|
unit = find_best_unit(units, min_in_table, max_in_table)
|
|
unit_name = unit_name_by_unit[unit]
|
|
|
|
return interval_pretty_printer(file_size_interval, unit=unit_name, multiplier=1 / unit)
|
|
|
|
|
|
def make_immutable(x):
|
|
if isinstance(x, list):
|
|
return tuple(make_immutable(elem) for elem in x)
|
|
return x
|
|
|
|
|
|
def dict_pretty_printer(dict_data: List[Dict[str, Union[str, Tuple[str]]]]) -> Callable[[Union[str, Tuple[str]]], str]:
|
|
if isinstance(dict_data, list):
|
|
dict_data = {make_immutable(mapping['from']): mapping['to'] for mapping in dict_data}
|
|
|
|
def pretty_print(s: Union[str, Tuple[str]]) -> str:
|
|
if s in dict_data:
|
|
return dict_data[s]
|
|
else:
|
|
raise Exception('dict_pretty_printer(%s) can\'t handle the value %s' % (dict_data, s))
|
|
|
|
return pretty_print
|
|
|
|
|
|
|
|
def determine_column_pretty_printer(pretty_printer_definition: Dict[str, Any]) -> DimensionPrettyPrinter:
|
|
if 'format_string' in pretty_printer_definition:
|
|
return format_string_pretty_printer(pretty_printer_definition['format_string'])
|
|
|
|
if 'fixed_map' in pretty_printer_definition:
|
|
return dict_pretty_printer(pretty_printer_definition['fixed_map'])
|
|
|
|
raise Exception("Unrecognized pretty printer description: %s" % pretty_printer_definition)
|
|
|
|
def determine_row_pretty_printer(pretty_printer_definition: Dict[str, Any]) -> DimensionPrettyPrinter:
|
|
return determine_column_pretty_printer(pretty_printer_definition)
|
|
|
|
def determine_row_sort_key(pretty_printer_definition: Dict[str, Any]) -> Callable[[Any], Any]:
|
|
if 'fixed_map' in pretty_printer_definition:
|
|
indexes = {x: i for i, x in enumerate(pretty_printer_definition['fixed_map'].keys())}
|
|
return lambda s: indexes[s]
|
|
|
|
return lambda x: x
|
|
|
|
def determine_value_pretty_printer(unit: str) -> IntervalPrettyPrinter:
|
|
if unit == "seconds":
|
|
return time_interval_pretty_printer
|
|
if unit == "bytes":
|
|
return file_size_interval_pretty_printer
|
|
raise Exception("Unrecognized unit: %s" % unit)
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Runs all the benchmarks whose results are on the Fruit website.')
|
|
parser.add_argument('--benchmark-results',
|
|
help='The input file where benchmark results will be read from (1 per line, with each line in JSON format). You can use the run_benchmarks.py to run a benchmark and generate results in this format.')
|
|
parser.add_argument('--baseline-benchmark-results',
|
|
help='Optional. If specified, compares this file (considered the "before" state) with the one specified in --benchmark-results.')
|
|
parser.add_argument('--benchmark-tables-definition', help='The YAML file that defines the benchmark tables (e.g. fruit_wiki_bench_tables.yaml).')
|
|
args = parser.parse_args()
|
|
|
|
if args.benchmark_results is None:
|
|
raise Exception("You must specify a benchmark results file using --benchmark-results.")
|
|
|
|
if args.benchmark_tables_definition is None:
|
|
raise Exception("You must specify a benchmark tables definition file using --benchmark-tables-definition.")
|
|
|
|
with open(args.benchmark_results, 'r') as f:
|
|
bench_results = [json.loads(line) for line in f.readlines()]
|
|
|
|
if args.baseline_benchmark_results:
|
|
with open(args.baseline_benchmark_results, 'r') as f:
|
|
baseline_bench_results = [json.loads(line) for line in f.readlines()]
|
|
else:
|
|
baseline_bench_results = None
|
|
|
|
with open(args.benchmark_tables_definition, 'r') as f:
|
|
used_bench_results = set()
|
|
# Set of (Benchmark definition, Benchmark result name) pairs
|
|
used_bench_result_values = set()
|
|
config = yaml.full_load(f)
|
|
for table_definition in config["tables"]:
|
|
try:
|
|
fixed_benchmark_params = {dimension_name: make_immutable(dimension_value) for dimension_name, dimension_value in table_definition['benchmark_filter'].items()}
|
|
table_data, last_used_bench_results, last_used_bench_result_values = extract_results(
|
|
bench_results,
|
|
fixed_benchmark_params=fixed_benchmark_params,
|
|
column_dimension=table_definition['columns']['dimension'],
|
|
row_dimension=table_definition['rows']['dimension'],
|
|
result_dimension=table_definition['results']['dimension'])
|
|
used_bench_results = used_bench_results.union(last_used_bench_results)
|
|
used_bench_result_values = used_bench_result_values.union(last_used_bench_result_values)
|
|
if baseline_bench_results:
|
|
baseline_table_data, _, _ = extract_results(
|
|
baseline_bench_results,
|
|
fixed_benchmark_params=fixed_benchmark_params,
|
|
column_dimension=table_definition['columns']['dimension'],
|
|
row_dimension=table_definition['rows']['dimension'],
|
|
result_dimension=table_definition['results']['dimension'])
|
|
else:
|
|
baseline_table_data = None
|
|
rows_pretty_printer_definition = table_definition['rows']['pretty_printer']
|
|
columns_pretty_printer_definition = table_definition['columns']['pretty_printer']
|
|
results_unit = table_definition['results']['unit']
|
|
print_confidence_intervals_table(table_definition['name'],
|
|
table_data,
|
|
baseline_table_data,
|
|
column_header_pretty_printer=determine_column_pretty_printer(columns_pretty_printer_definition),
|
|
row_header_pretty_printer=determine_row_pretty_printer(rows_pretty_printer_definition),
|
|
value_pretty_printer=determine_value_pretty_printer(results_unit),
|
|
row_sort_key=determine_row_sort_key(rows_pretty_printer_definition))
|
|
print()
|
|
print()
|
|
except Exception as e:
|
|
print('While processing table:\n%s' % table_definition)
|
|
print()
|
|
raise e
|
|
allowed_unused_benchmarks = set(config.get('allowed_unused_benchmarks', []))
|
|
allowed_unused_benchmark_results = set(config.get('allowed_unused_benchmark_results', []))
|
|
for bench_result in bench_results:
|
|
params = {dimension_name: make_immutable(dimension_value)
|
|
for dimension_name, dimension_value in bench_result['benchmark'].items()}
|
|
benchmark_defn = tuple(sorted(params.items()))
|
|
if benchmark_defn not in used_bench_results:
|
|
if params['name'] not in allowed_unused_benchmarks:
|
|
print('Warning: benchmark result did not match any tables: %s' % params)
|
|
else:
|
|
unused_result_dimensions = {result_dimension
|
|
for result_dimension in bench_result['results'].keys()
|
|
if (benchmark_defn, result_dimension) not in used_bench_result_values and result_dimension not in allowed_unused_benchmark_results}
|
|
if unused_result_dimensions:
|
|
print('Warning: unused result dimensions %s in benchmark result %s' % (unused_result_dimensions, params))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|