various performance work (wip)

2026-04-01 21:29:23 -07:00 · 2026-04-01 21:29:23 -07:00 · 71e263c527
commit 71e263c527
parent e77fd6e69f
25 changed files with 4071 additions and 326 deletions
--- a/scripts/diff_performance_baseline.py
+++ b/scripts/diff_performance_baseline.py
@ -4,13 +4,17 @@ from __future__ import annotations
 import argparse
 import json
 from dataclasses import asdict
+from datetime import datetime
 from pathlib import Path

-from inire.tests.example_scenarios import SCENARIO_SNAPSHOTS
+from inire.tests.example_scenarios import PERFORMANCE_SCENARIO_SNAPSHOTS, SCENARIO_SNAPSHOTS
+from inire.results import RouteMetrics


 SUMMARY_KEYS = (
    "duration_s",
+    "valid_results",
+    "reached_targets",
    "route_iterations",
    "nets_routed",
    "nodes_expanded",
@ -22,10 +26,30 @@ SUMMARY_KEYS = (
 )


-def _current_snapshots(selected_scenarios: tuple[str, ...] | None) -> dict[str, dict[str, object]]:
+def _snapshot_registry(include_performance_only: bool) -> tuple[tuple[str, object], ...]:
+    if not include_performance_only:
+        return SCENARIO_SNAPSHOTS
+    return SCENARIO_SNAPSHOTS + PERFORMANCE_SCENARIO_SNAPSHOTS
+
+
+def _available_metric_names() -> tuple[str, ...]:
+    return (
+        "duration_s",
+        "total_results",
+        "valid_results",
+        "reached_targets",
+        *RouteMetrics.__dataclass_fields__.keys(),
+    )
+
+
+def _current_snapshots(
+    selected_scenarios: tuple[str, ...] | None,
+    *,
+    include_performance_only: bool,
+) -> dict[str, dict[str, object]]:
    allowed = None if selected_scenarios is None else set(selected_scenarios)
    snapshots: dict[str, dict[str, object]] = {}
-    for name, run in SCENARIO_SNAPSHOTS:
+    for name, run in _snapshot_registry(include_performance_only):
        if allowed is not None and name not in allowed:
            continue
        snapshots[name] = asdict(run())
@ -42,13 +66,29 @@ def _load_baseline(path: Path, selected_scenarios: tuple[str, ...] | None) -> di
    }


-def _metric_value(snapshot: dict[str, object], key: str) -> float:
-    if key == "duration_s":
-        return float(snapshot["duration_s"])
+def _metric_value(snapshot: dict[str, object], key: str) -> float | None:
+    if key in {"duration_s", "total_results", "valid_results", "reached_targets"}:
+        return float(snapshot[key])
+    if key not in snapshot["metrics"]:
+        return None
    return float(snapshot["metrics"][key])


-def _render_report(baseline: dict[str, dict[str, object]], current: dict[str, dict[str, object]]) -> str:
+def _validate_metrics(metric_names: tuple[str, ...]) -> None:
+    valid_names = set(_available_metric_names())
+    unknown = [name for name in metric_names if name not in valid_names]
+    if unknown:
+        raise SystemExit(
+            f"Unknown metric name(s): {', '.join(sorted(unknown))}. "
+            f"Valid names are: {', '.join(_available_metric_names())}"
+        )
+
+
+def _render_report(
+    baseline: dict[str, dict[str, object]],
+    current: dict[str, dict[str, object]],
+    metric_names: tuple[str, ...],
+) -> str:
    scenario_names = sorted(set(baseline) | set(current))
    lines = [
        "# Performance Baseline Diff",
@ -60,20 +100,61 @@ def _render_report(baseline: dict[str, dict[str, object]], current: dict[str, di
        base_snapshot = baseline.get(scenario)
        curr_snapshot = current.get(scenario)
        if base_snapshot is None:
-            lines.append(f"| {scenario} | added | - | - | - |")
+            if curr_snapshot is None:
+                lines.append(f"| {scenario} | added | - | - | - |")
+                continue
+            for key in metric_names:
+                curr_value = _metric_value(curr_snapshot, key)
+                if curr_value is None:
+                    lines.append(f"| {scenario} | {key} | - | - | - |")
+                    continue
+                lines.append(f"| {scenario} | {key} | - | {curr_value:.4f} | - |")
            continue
        if curr_snapshot is None:
            lines.append(f"| {scenario} | missing | - | - | - |")
            continue
-        for key in SUMMARY_KEYS:
+        for key in metric_names:
            base_value = _metric_value(base_snapshot, key)
            curr_value = _metric_value(curr_snapshot, key)
+            if base_value is None:
+                lines.append(
+                    f"| {scenario} | {key} | - | {curr_value:.4f} | - |"
+                )
+                continue
+            if curr_value is None:
+                lines.append(
+                    f"| {scenario} | {key} | {base_value:.4f} | - | - |"
+                )
+                continue
            lines.append(
                f"| {scenario} | {key} | {base_value:.4f} | {curr_value:.4f} | {curr_value - base_value:+.4f} |"
            )
    return "\n".join(lines) + "\n"


+def _render_log_entry(
+    *,
+    baseline_path: Path,
+    label: str,
+    notes: tuple[str, ...],
+    report: str,
+) -> str:
+    lines = [
+        f"## {label}",
+        "",
+        f"Measured on {datetime.now().astimezone().isoformat(timespec='seconds')}.",
+        f"Baseline: `{baseline_path}`.",
+        "",
+    ]
+    if notes:
+        lines.extend(["Findings:", ""])
+        lines.extend(f"- {note}" for note in notes)
+        lines.append("")
+    lines.append(report.rstrip())
+    lines.append("")
+    return "\n".join(lines)
+
+
 def main() -> None:
    parser = argparse.ArgumentParser(description="Diff the committed performance baseline against a fresh run.")
    parser.add_argument(
@ -95,18 +176,61 @@ def main() -> None:
        default=[],
        help="Optional scenario name to include. May be passed more than once.",
    )
+    parser.add_argument(
+        "--metric",
+        action="append",
+        dest="metrics",
+        default=[],
+        help="Optional metric to include. May be passed more than once. Defaults to the summary metric set.",
+    )
+    parser.add_argument(
+        "--label",
+        default="Measurement",
+        help="Section label to use when appending to a log file.",
+    )
+    parser.add_argument(
+        "--notes",
+        action="append",
+        dest="notes",
+        default=[],
+        help="Optional short finding to append under the measurement section. May be passed more than once.",
+    )
+    parser.add_argument(
+        "--log",
+        type=Path,
+        default=None,
+        help="Optional Markdown log file to append the rendered report to.",
+    )
+    parser.add_argument(
+        "--include-performance-only",
+        action="store_true",
+        help="Include performance-only snapshot scenarios that are excluded from the default baseline corpus.",
+    )
    args = parser.parse_args()

    selected = tuple(args.scenarios) if args.scenarios else None
+    metrics = tuple(args.metrics) if args.metrics else SUMMARY_KEYS
+    _validate_metrics(metrics)
    baseline = _load_baseline(args.baseline, selected)
-    current = _current_snapshots(selected)
-    report = _render_report(baseline, current)
+    current = _current_snapshots(selected, include_performance_only=args.include_performance_only)
+    report = _render_report(baseline, current, metrics)

-    if args.output is None:
-        print(report, end="")
-    else:
+    if args.output is not None:
        args.output.write_text(report)
        print(f"Wrote {args.output}")
+    elif args.log is None:
+        print(report, end="")
+
+    if args.log is not None:
+        entry = _render_log_entry(
+            baseline_path=args.baseline,
+            label=args.label,
+            notes=tuple(args.notes),
+            report=report,
+        )
+        with args.log.open("a", encoding="utf-8") as handle:
+            handle.write(entry)
+        print(f"Appended {args.log}")


 if __name__ == "__main__":