diff --git a/pyperf/__main__.py b/pyperf/__main__.py index 572253ae..3dd9a6e8 100644 --- a/pyperf/__main__.py +++ b/pyperf/__main__.py @@ -113,6 +113,9 @@ def cpu_affinity(cmd): cmd.add_argument("--table-format", type=str, default="rest", choices=["rest", "md"], help="Format of table rendering") + cmd.add_argument("--extra-metadata", type=str, + help="Comma-separated metadata keys to include in comparison output") + input_filenames(cmd) # stats @@ -389,6 +392,9 @@ def cmd_compare_to(args): from pyperf._compare import compare_suites, CompareError data = load_benchmarks(args) + if getattr(args, "extra_metadata", None): + display_title("Benchmark") + print() if data.get_nsuite() < 2: print("ERROR: need at least two benchmark files") sys.exit(1) diff --git a/pyperf/_compare.py b/pyperf/_compare.py index 9b5669aa..2d81f39f 100644 --- a/pyperf/_compare.py +++ b/pyperf/_compare.py @@ -54,7 +54,7 @@ def get_tags_for_result(result): class CompareResult: - def __init__(self, ref, changed, min_speed=None): + def __init__(self, ref, changed, min_speed=None, extra_metadata=None): # CompareData object self.ref = ref # CompareData object @@ -63,6 +63,7 @@ def __init__(self, ref, changed, min_speed=None): self._significant = None self._t_score = None self._norm_mean = None + self.extra_metadata = extra_metadata or [] def __repr__(self): return '' % (self.ref, self.changed) @@ -110,21 +111,37 @@ def oneliner(self, verbose=True, show_name=True, check_significant=True): ref_text = format_result_value(self.ref.benchmark) chg_text = format_result_value(self.changed.benchmark) + if verbose: if show_name: ref_text = "[%s] %s" % (self.ref.name, ref_text) chg_text = "[%s] %s" % (self.changed.name, chg_text) - if (self.ref.benchmark.get_nvalue() > 1 - or self.changed.benchmark.get_nvalue() > 1): + + if (self.ref.benchmark.get_nvalue() > 1 + or self.changed.benchmark.get_nvalue() > 1): text = "Mean +- std dev: %s -> %s" % (ref_text, chg_text) else: text = "%s -> %s" % (ref_text, chg_text) else: text = "%s -> %s" % (ref_text, chg_text) + # normalized mean text = "%s: %s" % (text, format_normalized_mean(self.norm_mean)) - return text + # Extra metadata support + if self.extra_metadata: + ref_meta = self.ref.benchmark.get_metadata() + chg_meta = self.changed.benchmark.get_metadata() + meta_parts = [] + for key in self.extra_metadata: + if key in ref_meta: + meta_parts.append(f"{key}={ref_meta[key]}") + if key in chg_meta: + meta_parts.append(f"{key}={chg_meta[key]}") + if meta_parts: + text += " [" + ", ".join(meta_parts) + "]" + return text + def format(self, verbose=True, show_name=True): text = self.oneliner(show_name=show_name, check_significant=False) lines = [text] @@ -225,7 +242,13 @@ class CompareError(Exception): class CompareSuites: def __init__(self, benchmarks, args): self.benchmarks = benchmarks - + self.extra_metadata = getattr(args, "extra_metadata", None) + if self.extra_metadata: + self.extra_metadata = [ + key.strip() for key in self.extra_metadata.split(",") + ] + else: + self.extra_metadata = [] self.table = args.table self.table_format = args.table_format self.min_speed = args.min_speed @@ -233,6 +256,13 @@ def __init__(self, benchmarks, args): self.verbose = args.verbose self.quiet = args.quiet + # Handle extra metadata argument + self.extra_metadata = getattr(args, "extra_metadata", None) + if self.extra_metadata: + self.extra_metadata = [key.strip() for key in self.extra_metadata.split(",")] + else: + self.extra_metadata = [] + grouped_by_name = self.benchmarks.group_by_name() if not grouped_by_name: raise CompareError("Benchmark suites have no benchmark in common") @@ -262,7 +292,8 @@ def compare_benchmarks(self, name, benchmarks): for item in benchmarks[1:]: changed = CompareData(item.filename, item.benchmark) - result = CompareResult(ref, changed, min_speed) + result = CompareResult(ref,changed,min_speed, + extra_metadata=self.extra_metadata) results.append(result) return results @@ -280,46 +311,82 @@ def sort_key(results): self.all_results.sort(key=sort_key) + # Build Headers + # Structure: [Benchmark] [Ref Name] [Ref Meta...] [Changed Name] [Changed Meta...] ... headers = ['Benchmark', self.all_results[0][0].ref.name] + + # Add Reference Metadata Headers + for key in self.extra_metadata: + headers.append(key) + + # Add Changed Metadata Headers for item in self.all_results[0]: headers.append(item.changed.name) + for key in self.extra_metadata: + headers.append(key) - all_norm_means = [[] for _ in range(len(headers[2:]))] + # Initialize storage for geometric mean calculation + # We assume 1 normalized mean per changed benchmark + num_changed_benchmarks = len(self.all_results[0]) + all_norm_means = [[] for _ in range(num_changed_benchmarks)] rows = [] not_significant = [] for results in all_results: row = [results.name] + # Reference Data ref_bench = results[0].ref.benchmark text = ref_bench.format_value(ref_bench.mean()) row.append(text) + + # Reference Metadata Values + for key in self.extra_metadata: + value = ref_bench.get_metadata().get(key, "-") + row.append(str(value)) significants = [] for index, result in enumerate(results): bench = result.changed.benchmark significant = result.significant + + # Comparison Result if significant: text = format_normalized_mean(result.norm_mean) if not self.quiet: text = "%s: %s" % (bench.format_value(bench.mean()), text) else: text = "not significant" + significants.append(significant) all_norm_means[index].append(result.norm_mean) row.append(text) + # Changed Metadata Values + for key in self.extra_metadata: + value = bench.get_metadata().get(key, "-") + row.append(str(value)) + if any(significants): rows.append(row) else: not_significant.append(results.name) + # Geometric Mean Row # only compute the geometric mean if there is at least two benchmarks - # and if at least one is signicant. + # and if at least one is significant (which means rows is not empty) if len(all_norm_means[0]) > 1 and rows: row = ['Geometric mean', '(ref)'] + + # Empty slots for Reference Metadata (to align columns) + for _ in self.extra_metadata: + row.append('') + for norm_means in all_norm_means: row.append(format_geometric_mean(norm_means)) + # Empty slots for Changed Metadata (to align columns) + for _ in self.extra_metadata: + row.append('') rows.append(row) if rows: @@ -491,4 +558,4 @@ def timeit_compare_benchs(name1, bench1, name2, bench2, args): print(line) else: line = compare.oneliner() - print(line) + print(line) \ No newline at end of file diff --git a/pyperf/tests/test_extra_metadata.py b/pyperf/tests/test_extra_metadata.py new file mode 100644 index 00000000..014eb87b --- /dev/null +++ b/pyperf/tests/test_extra_metadata.py @@ -0,0 +1,90 @@ +import json +import os +import subprocess +import sys +import tempfile +import pyperf + + +def create_temp_benchmark(tmpdir, data): + import uuid + """ + Create a valid pyperf JSON benchmark file. + + pyperf requires the structure: + { + "version": "1.0", + "benchmarks": [ + { + "metadata": {...}, + "runs": [...] + } + ] + } + """ + + # pyperf requires a benchmark name + unit + metadata = { + "name": "test_bench", + "unit": "second" + } + metadata.update(data.get("metadata", {})) + + benchmark = { + "metadata": metadata, + "runs": data.get("runs", []) + } + + suite = { + "version": "1.0", + "benchmarks": [benchmark] + } + + path = os.path.join(tmpdir, f"bench_{uuid.uuid4().hex}.json") + with open(path, "w", encoding="utf-8") as f: + json.dump(suite, f) + + return path + + +def run_command(cmd): + proc = subprocess.Popen( + [sys.executable, "-m", "pyperf"] + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + stdout, stderr = proc.communicate() + return stdout, stderr + + +def test_compare_to_with_extra_metadata(tmpdir): + # 1. Create benchmark files with metadata + bench1 = create_temp_benchmark(tmpdir, { + "metadata": {"os": "linux", "cpu": "amd"}, + "runs": [{"values": [1.0]}] + }) + + bench2 = create_temp_benchmark(tmpdir, { + "metadata": {"os": "linux", "cpu": "intel"}, + "runs": [{"values": [1.0]}] + }) + + # 2. Run compare_to + cmd = [ + "compare_to", + "--extra-metadata=os,cpu", + bench1, + bench2, + ] + + stdout, stderr = run_command(cmd) + + # 3. Assertions + assert stderr == "" + assert "os" in stdout + assert "cpu" in stdout + assert "linux" in stdout + assert "amd" in stdout + assert "intel" in stdout + assert "Benchmark" in stdout diff --git a/pyperf/tests/test_perf_cli.py b/pyperf/tests/test_perf_cli.py index fbb94037..85ba6254 100644 --- a/pyperf/tests/test_perf_cli.py +++ b/pyperf/tests/test_perf_cli.py @@ -742,6 +742,13 @@ def test_hook(self): assert metadata.get("_test_hook", 0) > 0 assert metadata.get("hooks", None) == "_test_hook" + def test_compare_to_extra_metadata(self): + ref_result = self.create_bench((1.0,), metadata={'name': 'bench', 'os': 'linux', 'cpu': 'amd'}) + changed_result = self.create_bench((1.0,), metadata={'name': 'bench', 'os': 'linux', 'cpu': 'intel'}) + stdout = self.compare('compare_to', ref_result, changed_result, '--extra-metadata=os,cpu') + self.assertIn('os=linux', stdout) + self.assertIn('cpu=amd', stdout) + self.assertIn('cpu=intel', stdout) class TestConvert(BaseTestCase, unittest.TestCase): def test_stdout(self):