inire/scripts/diff_performance_baseline.py

237 lines
7.4 KiB
Python
Raw Normal View History

2026-03-31 17:41:15 -07:00
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
from dataclasses import asdict
2026-04-01 21:29:23 -07:00
from datetime import datetime
2026-03-31 17:41:15 -07:00
from pathlib import Path
2026-04-01 21:29:23 -07:00
from inire.tests.example_scenarios import PERFORMANCE_SCENARIO_SNAPSHOTS, SCENARIO_SNAPSHOTS
from inire.results import RouteMetrics
2026-03-31 17:41:15 -07:00
SUMMARY_KEYS = (
"duration_s",
2026-04-01 21:29:23 -07:00
"valid_results",
"reached_targets",
2026-03-31 17:41:15 -07:00
"route_iterations",
"nets_routed",
"nodes_expanded",
"ray_cast_calls",
"moves_generated",
"moves_added",
"congestion_check_calls",
"verify_path_report_calls",
)
2026-04-01 21:29:23 -07:00
def _snapshot_registry(include_performance_only: bool) -> tuple[tuple[str, object], ...]:
if not include_performance_only:
return SCENARIO_SNAPSHOTS
return SCENARIO_SNAPSHOTS + PERFORMANCE_SCENARIO_SNAPSHOTS
def _available_metric_names() -> tuple[str, ...]:
return (
"duration_s",
"total_results",
"valid_results",
"reached_targets",
*RouteMetrics.__dataclass_fields__.keys(),
)
def _current_snapshots(
selected_scenarios: tuple[str, ...] | None,
*,
include_performance_only: bool,
) -> dict[str, dict[str, object]]:
2026-03-31 17:41:15 -07:00
allowed = None if selected_scenarios is None else set(selected_scenarios)
snapshots: dict[str, dict[str, object]] = {}
2026-04-01 21:29:23 -07:00
for name, run in _snapshot_registry(include_performance_only):
2026-03-31 17:41:15 -07:00
if allowed is not None and name not in allowed:
continue
snapshots[name] = asdict(run())
return snapshots
def _load_baseline(path: Path, selected_scenarios: tuple[str, ...] | None) -> dict[str, dict[str, object]]:
payload = json.loads(path.read_text())
allowed = None if selected_scenarios is None else set(selected_scenarios)
return {
entry["name"]: entry
for entry in payload["scenarios"]
if allowed is None or entry["name"] in allowed
}
2026-04-01 21:29:23 -07:00
def _metric_value(snapshot: dict[str, object], key: str) -> float | None:
if key in {"duration_s", "total_results", "valid_results", "reached_targets"}:
return float(snapshot[key])
if key not in snapshot["metrics"]:
return None
2026-03-31 17:41:15 -07:00
return float(snapshot["metrics"][key])
2026-04-01 21:29:23 -07:00
def _validate_metrics(metric_names: tuple[str, ...]) -> None:
valid_names = set(_available_metric_names())
unknown = [name for name in metric_names if name not in valid_names]
if unknown:
raise SystemExit(
f"Unknown metric name(s): {', '.join(sorted(unknown))}. "
f"Valid names are: {', '.join(_available_metric_names())}"
)
def _render_report(
baseline: dict[str, dict[str, object]],
current: dict[str, dict[str, object]],
metric_names: tuple[str, ...],
) -> str:
2026-03-31 17:41:15 -07:00
scenario_names = sorted(set(baseline) | set(current))
lines = [
"# Performance Baseline Diff",
"",
"| Scenario | Metric | Baseline | Current | Delta |",
"| :-- | :-- | --: | --: | --: |",
]
for scenario in scenario_names:
base_snapshot = baseline.get(scenario)
curr_snapshot = current.get(scenario)
if base_snapshot is None:
2026-04-01 21:29:23 -07:00
if curr_snapshot is None:
lines.append(f"| {scenario} | added | - | - | - |")
continue
for key in metric_names:
curr_value = _metric_value(curr_snapshot, key)
if curr_value is None:
lines.append(f"| {scenario} | {key} | - | - | - |")
continue
lines.append(f"| {scenario} | {key} | - | {curr_value:.4f} | - |")
2026-03-31 17:41:15 -07:00
continue
if curr_snapshot is None:
lines.append(f"| {scenario} | missing | - | - | - |")
continue
2026-04-01 21:29:23 -07:00
for key in metric_names:
2026-03-31 17:41:15 -07:00
base_value = _metric_value(base_snapshot, key)
curr_value = _metric_value(curr_snapshot, key)
2026-04-01 21:29:23 -07:00
if base_value is None:
lines.append(
f"| {scenario} | {key} | - | {curr_value:.4f} | - |"
)
continue
if curr_value is None:
lines.append(
f"| {scenario} | {key} | {base_value:.4f} | - | - |"
)
continue
2026-03-31 17:41:15 -07:00
lines.append(
f"| {scenario} | {key} | {base_value:.4f} | {curr_value:.4f} | {curr_value - base_value:+.4f} |"
)
return "\n".join(lines) + "\n"
2026-04-01 21:29:23 -07:00
def _render_log_entry(
*,
baseline_path: Path,
label: str,
notes: tuple[str, ...],
report: str,
) -> str:
lines = [
f"## {label}",
"",
f"Measured on {datetime.now().astimezone().isoformat(timespec='seconds')}.",
f"Baseline: `{baseline_path}`.",
"",
]
if notes:
lines.extend(["Findings:", ""])
lines.extend(f"- {note}" for note in notes)
lines.append("")
lines.append(report.rstrip())
lines.append("")
return "\n".join(lines)
2026-03-31 17:41:15 -07:00
def main() -> None:
parser = argparse.ArgumentParser(description="Diff the committed performance baseline against a fresh run.")
parser.add_argument(
"--baseline",
type=Path,
default=Path("docs/performance_baseline.json"),
help="Baseline JSON to compare against.",
)
parser.add_argument(
"--output",
type=Path,
default=None,
help="Optional file to write the report to. Defaults to stdout.",
)
parser.add_argument(
"--scenario",
action="append",
dest="scenarios",
default=[],
help="Optional scenario name to include. May be passed more than once.",
)
2026-04-01 21:29:23 -07:00
parser.add_argument(
"--metric",
action="append",
dest="metrics",
default=[],
help="Optional metric to include. May be passed more than once. Defaults to the summary metric set.",
)
parser.add_argument(
"--label",
default="Measurement",
help="Section label to use when appending to a log file.",
)
parser.add_argument(
"--notes",
action="append",
dest="notes",
default=[],
help="Optional short finding to append under the measurement section. May be passed more than once.",
)
parser.add_argument(
"--log",
type=Path,
default=None,
help="Optional Markdown log file to append the rendered report to.",
)
parser.add_argument(
"--include-performance-only",
action="store_true",
help="Include performance-only snapshot scenarios that are excluded from the default baseline corpus.",
)
2026-03-31 17:41:15 -07:00
args = parser.parse_args()
selected = tuple(args.scenarios) if args.scenarios else None
2026-04-01 21:29:23 -07:00
metrics = tuple(args.metrics) if args.metrics else SUMMARY_KEYS
_validate_metrics(metrics)
2026-03-31 17:41:15 -07:00
baseline = _load_baseline(args.baseline, selected)
2026-04-01 21:29:23 -07:00
current = _current_snapshots(selected, include_performance_only=args.include_performance_only)
report = _render_report(baseline, current, metrics)
2026-03-31 17:41:15 -07:00
2026-04-01 21:29:23 -07:00
if args.output is not None:
2026-03-31 17:41:15 -07:00
args.output.write_text(report)
print(f"Wrote {args.output}")
2026-04-01 21:29:23 -07:00
elif args.log is None:
print(report, end="")
if args.log is not None:
entry = _render_log_entry(
baseline_path=args.baseline,
label=args.label,
notes=tuple(args.notes),
report=report,
)
with args.log.open("a", encoding="utf-8") as handle:
handle.write(entry)
print(f"Appended {args.log}")
2026-03-31 17:41:15 -07:00
if __name__ == "__main__":
main()