inire/scripts/diff_performance_baseline.py

#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
from dataclasses import asdict
from datetime import datetime
from pathlib import Path

from inire.tests.example_scenarios import PERFORMANCE_SCENARIO_SNAPSHOTS, SCENARIO_SNAPSHOTS
from inire.results import RouteMetrics


SUMMARY_KEYS = (
    "duration_s",
    "valid_results",
    "reached_targets",
    "route_iterations",
    "nets_routed",
    "nodes_expanded",
    "ray_cast_calls",
    "moves_generated",
    "moves_added",
    "congestion_check_calls",
    "verify_path_report_calls",
)


def _snapshot_registry(include_performance_only: bool) -> tuple[tuple[str, object], ...]:
    if not include_performance_only:
        return SCENARIO_SNAPSHOTS
    return SCENARIO_SNAPSHOTS + PERFORMANCE_SCENARIO_SNAPSHOTS


def _available_metric_names() -> tuple[str, ...]:
    return (
        "duration_s",
        "total_results",
        "valid_results",
        "reached_targets",
        *RouteMetrics.__dataclass_fields__.keys(),
    )


def _current_snapshots(
    selected_scenarios: tuple[str, ...] | None,
    *,
    include_performance_only: bool,
) -> dict[str, dict[str, object]]:
    allowed = None if selected_scenarios is None else set(selected_scenarios)
    snapshots: dict[str, dict[str, object]] = {}
    for name, run in _snapshot_registry(include_performance_only):
        if allowed is not None and name not in allowed:
            continue
        snapshots[name] = asdict(run())
    return snapshots


def _load_baseline(path: Path, selected_scenarios: tuple[str, ...] | None) -> dict[str, dict[str, object]]:
    payload = json.loads(path.read_text())
    allowed = None if selected_scenarios is None else set(selected_scenarios)
    return {
        entry["name"]: entry
        for entry in payload["scenarios"]
        if allowed is None or entry["name"] in allowed
    }


def _metric_value(snapshot: dict[str, object], key: str) -> float | None:
    if key in {"duration_s", "total_results", "valid_results", "reached_targets"}:
        return float(snapshot[key])
    if key not in snapshot["metrics"]:
        return None
    return float(snapshot["metrics"][key])


def _validate_metrics(metric_names: tuple[str, ...]) -> None:
    valid_names = set(_available_metric_names())
    unknown = [name for name in metric_names if name not in valid_names]
    if unknown:
        raise SystemExit(
            f"Unknown metric name(s): {', '.join(sorted(unknown))}. "
            f"Valid names are: {', '.join(_available_metric_names())}"
        )


def _render_report(
    baseline: dict[str, dict[str, object]],
    current: dict[str, dict[str, object]],
    metric_names: tuple[str, ...],
) -> str:
    scenario_names = sorted(set(baseline) | set(current))
    lines = [
        "# Performance Baseline Diff",
        "",
        "| Scenario | Metric | Baseline | Current | Delta |",
        "| :-- | :-- | --: | --: | --: |",
    ]
    for scenario in scenario_names:
        base_snapshot = baseline.get(scenario)
        curr_snapshot = current.get(scenario)
        if base_snapshot is None:
            if curr_snapshot is None:
                lines.append(f"| {scenario} | added | - | - | - |")
                continue
            for key in metric_names:
                curr_value = _metric_value(curr_snapshot, key)
                if curr_value is None:
                    lines.append(f"| {scenario} | {key} | - | - | - |")
                    continue
                lines.append(f"| {scenario} | {key} | - | {curr_value:.4f} | - |")
            continue
        if curr_snapshot is None:
            lines.append(f"| {scenario} | missing | - | - | - |")
            continue
        for key in metric_names:
            base_value = _metric_value(base_snapshot, key)
            curr_value = _metric_value(curr_snapshot, key)
            if base_value is None:
                lines.append(
                    f"| {scenario} | {key} | - | {curr_value:.4f} | - |"
                )
                continue
            if curr_value is None:
                lines.append(
                    f"| {scenario} | {key} | {base_value:.4f} | - | - |"
                )
                continue
            lines.append(
                f"| {scenario} | {key} | {base_value:.4f} | {curr_value:.4f} | {curr_value - base_value:+.4f} |"
            )
    return "\n".join(lines) + "\n"


def _render_log_entry(
    *,
    baseline_path: Path,
    label: str,
    notes: tuple[str, ...],
    report: str,
) -> str:
    lines = [
        f"## {label}",
        "",
        f"Measured on {datetime.now().astimezone().isoformat(timespec='seconds')}.",
        f"Baseline: `{baseline_path}`.",
        "",
    ]
    if notes:
        lines.extend(["Findings:", ""])
        lines.extend(f"- {note}" for note in notes)
        lines.append("")
    lines.append(report.rstrip())
    lines.append("")
    return "\n".join(lines)


def main() -> None:
    parser = argparse.ArgumentParser(description="Diff the committed performance baseline against a fresh run.")
    parser.add_argument(
        "--baseline",
        type=Path,
        default=Path("docs/performance_baseline.json"),
        help="Baseline JSON to compare against.",
    )
    parser.add_argument(
        "--output",
        type=Path,
        default=None,
        help="Optional file to write the report to. Defaults to stdout.",
    )
    parser.add_argument(
        "--scenario",
        action="append",
        dest="scenarios",
        default=[],
        help="Optional scenario name to include. May be passed more than once.",
    )
    parser.add_argument(
        "--metric",
        action="append",
        dest="metrics",
        default=[],
        help="Optional metric to include. May be passed more than once. Defaults to the summary metric set.",
    )
    parser.add_argument(
        "--label",
        default="Measurement",
        help="Section label to use when appending to a log file.",
    )
    parser.add_argument(
        "--notes",
        action="append",
        dest="notes",
        default=[],
        help="Optional short finding to append under the measurement section. May be passed more than once.",
    )
    parser.add_argument(
        "--log",
        type=Path,
        default=None,
        help="Optional Markdown log file to append the rendered report to.",
    )
    parser.add_argument(
        "--include-performance-only",
        action="store_true",
        help="Include performance-only snapshot scenarios that are excluded from the default baseline corpus.",
    )
    args = parser.parse_args()

    selected = tuple(args.scenarios) if args.scenarios else None
    metrics = tuple(args.metrics) if args.metrics else SUMMARY_KEYS
    _validate_metrics(metrics)
    baseline = _load_baseline(args.baseline, selected)
    current = _current_snapshots(selected, include_performance_only=args.include_performance_only)
    report = _render_report(baseline, current, metrics)

    if args.output is not None:
        args.output.write_text(report)
        print(f"Wrote {args.output}")
    elif args.log is None:
        print(report, end="")

    if args.log is not None:
        entry = _render_log_entry(
            baseline_path=args.baseline,
            label=args.label,
            notes=tuple(args.notes),
            report=report,
        )
        with args.log.open("a", encoding="utf-8") as handle:
            handle.write(entry)
        print(f"Appended {args.log}")


if __name__ == "__main__":
    main()
more perf counters 2026-03-31 17:41:15 -07:00			`#!/usr/bin/env python3`
			`from __future__ import annotations`

			`import argparse`
			`import json`
			`from dataclasses import asdict`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`from datetime import datetime`
more perf counters 2026-03-31 17:41:15 -07:00			`from pathlib import Path`

various performance work (wip) 2026-04-01 21:29:23 -07:00			`from inire.tests.example_scenarios import PERFORMANCE_SCENARIO_SNAPSHOTS, SCENARIO_SNAPSHOTS`
			`from inire.results import RouteMetrics`
more perf counters 2026-03-31 17:41:15 -07:00

			`SUMMARY_KEYS = (`
			`"duration_s",`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`"valid_results",`
			`"reached_targets",`
more perf counters 2026-03-31 17:41:15 -07:00			`"route_iterations",`
			`"nets_routed",`
			`"nodes_expanded",`
			`"ray_cast_calls",`
			`"moves_generated",`
			`"moves_added",`
			`"congestion_check_calls",`
			`"verify_path_report_calls",`
			`)`


various performance work (wip) 2026-04-01 21:29:23 -07:00			`def _snapshot_registry(include_performance_only: bool) -> tuple[tuple[str, object], ...]:`
			`if not include_performance_only:`
			`return SCENARIO_SNAPSHOTS`
			`return SCENARIO_SNAPSHOTS + PERFORMANCE_SCENARIO_SNAPSHOTS`


			`def _available_metric_names() -> tuple[str, ...]:`
			`return (`
			`"duration_s",`
			`"total_results",`
			`"valid_results",`
			`"reached_targets",`
			`*RouteMetrics.__dataclass_fields__.keys(),`
			`)`


			`def _current_snapshots(`
			`selected_scenarios: tuple[str, ...] \| None,`
			`*,`
			`include_performance_only: bool,`
			`) -> dict[str, dict[str, object]]:`
more perf counters 2026-03-31 17:41:15 -07:00			`allowed = None if selected_scenarios is None else set(selected_scenarios)`
			`snapshots: dict[str, dict[str, object]] = {}`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`for name, run in _snapshot_registry(include_performance_only):`
more perf counters 2026-03-31 17:41:15 -07:00			`if allowed is not None and name not in allowed:`
			`continue`
			`snapshots[name] = asdict(run())`
			`return snapshots`


			`def _load_baseline(path: Path, selected_scenarios: tuple[str, ...] \| None) -> dict[str, dict[str, object]]:`
			`payload = json.loads(path.read_text())`
			`allowed = None if selected_scenarios is None else set(selected_scenarios)`
			`return {`
			`entry["name"]: entry`
			`for entry in payload["scenarios"]`
			`if allowed is None or entry["name"] in allowed`
			`}`


various performance work (wip) 2026-04-01 21:29:23 -07:00			`def _metric_value(snapshot: dict[str, object], key: str) -> float \| None:`
			`if key in {"duration_s", "total_results", "valid_results", "reached_targets"}:`
			`return float(snapshot[key])`
			`if key not in snapshot["metrics"]:`
			`return None`
more perf counters 2026-03-31 17:41:15 -07:00			`return float(snapshot["metrics"][key])`


various performance work (wip) 2026-04-01 21:29:23 -07:00			`def _validate_metrics(metric_names: tuple[str, ...]) -> None:`
			`valid_names = set(_available_metric_names())`
			`unknown = [name for name in metric_names if name not in valid_names]`
			`if unknown:`
			`raise SystemExit(`
			`f"Unknown metric name(s): {', '.join(sorted(unknown))}. "`
			`f"Valid names are: {', '.join(_available_metric_names())}"`
			`)`


			`def _render_report(`
			`baseline: dict[str, dict[str, object]],`
			`current: dict[str, dict[str, object]],`
			`metric_names: tuple[str, ...],`
			`) -> str:`
more perf counters 2026-03-31 17:41:15 -07:00			`scenario_names = sorted(set(baseline) \| set(current))`
			`lines = [`
			`"# Performance Baseline Diff",`
			`"",`
			`"\| Scenario \| Metric \| Baseline \| Current \| Delta \|",`
			`"\| :-- \| :-- \| --: \| --: \| --: \|",`
			`]`
			`for scenario in scenario_names:`
			`base_snapshot = baseline.get(scenario)`
			`curr_snapshot = current.get(scenario)`
			`if base_snapshot is None:`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`if curr_snapshot is None:`
			`lines.append(f"\| {scenario} \| added \| - \| - \| - \|")`
			`continue`
			`for key in metric_names:`
			`curr_value = _metric_value(curr_snapshot, key)`
			`if curr_value is None:`
			`lines.append(f"\| {scenario} \| {key} \| - \| - \| - \|")`
			`continue`
			`lines.append(f"\| {scenario} \| {key} \| - \| {curr_value:.4f} \| - \|")`
more perf counters 2026-03-31 17:41:15 -07:00			`continue`
			`if curr_snapshot is None:`
			`lines.append(f"\| {scenario} \| missing \| - \| - \| - \|")`
			`continue`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`for key in metric_names:`
more perf counters 2026-03-31 17:41:15 -07:00			`base_value = _metric_value(base_snapshot, key)`
			`curr_value = _metric_value(curr_snapshot, key)`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`if base_value is None:`
			`lines.append(`
			`f"\| {scenario} \| {key} \| - \| {curr_value:.4f} \| - \|"`
			`)`
			`continue`
			`if curr_value is None:`
			`lines.append(`
			`f"\| {scenario} \| {key} \| {base_value:.4f} \| - \| - \|"`
			`)`
			`continue`
more perf counters 2026-03-31 17:41:15 -07:00			`lines.append(`
			`f"\| {scenario} \| {key} \| {base_value:.4f} \| {curr_value:.4f} \| {curr_value - base_value:+.4f} \|"`
			`)`
			`return "\n".join(lines) + "\n"`


various performance work (wip) 2026-04-01 21:29:23 -07:00			`def _render_log_entry(`
			`*,`
			`baseline_path: Path,`
			`label: str,`
			`notes: tuple[str, ...],`
			`report: str,`
			`) -> str:`
			`lines = [`
			`f"## {label}",`
			`"",`
			`f"Measured on {datetime.now().astimezone().isoformat(timespec='seconds')}.",`
			f"Baseline: `{baseline_path}`.",
			`"",`
			`]`
			`if notes:`
			`lines.extend(["Findings:", ""])`
			`lines.extend(f"- {note}" for note in notes)`
			`lines.append("")`
			`lines.append(report.rstrip())`
			`lines.append("")`
			`return "\n".join(lines)`


more perf counters 2026-03-31 17:41:15 -07:00			`def main() -> None:`
			`parser = argparse.ArgumentParser(description="Diff the committed performance baseline against a fresh run.")`
			`parser.add_argument(`
			`"--baseline",`
			`type=Path,`
			`default=Path("docs/performance_baseline.json"),`
			`help="Baseline JSON to compare against.",`
			`)`
			`parser.add_argument(`
			`"--output",`
			`type=Path,`
			`default=None,`
			`help="Optional file to write the report to. Defaults to stdout.",`
			`)`
			`parser.add_argument(`
			`"--scenario",`
			`action="append",`
			`dest="scenarios",`
			`default=[],`
			`help="Optional scenario name to include. May be passed more than once.",`
			`)`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`parser.add_argument(`
			`"--metric",`
			`action="append",`
			`dest="metrics",`
			`default=[],`
			`help="Optional metric to include. May be passed more than once. Defaults to the summary metric set.",`
			`)`
			`parser.add_argument(`
			`"--label",`
			`default="Measurement",`
			`help="Section label to use when appending to a log file.",`
			`)`
			`parser.add_argument(`
			`"--notes",`
			`action="append",`
			`dest="notes",`
			`default=[],`
			`help="Optional short finding to append under the measurement section. May be passed more than once.",`
			`)`
			`parser.add_argument(`
			`"--log",`
			`type=Path,`
			`default=None,`
			`help="Optional Markdown log file to append the rendered report to.",`
			`)`
			`parser.add_argument(`
			`"--include-performance-only",`
			`action="store_true",`
			`help="Include performance-only snapshot scenarios that are excluded from the default baseline corpus.",`
			`)`
more perf counters 2026-03-31 17:41:15 -07:00			`args = parser.parse_args()`

			`selected = tuple(args.scenarios) if args.scenarios else None`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`metrics = tuple(args.metrics) if args.metrics else SUMMARY_KEYS`
			`_validate_metrics(metrics)`
more perf counters 2026-03-31 17:41:15 -07:00			`baseline = _load_baseline(args.baseline, selected)`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`current = _current_snapshots(selected, include_performance_only=args.include_performance_only)`
			`report = _render_report(baseline, current, metrics)`
more perf counters 2026-03-31 17:41:15 -07:00
various performance work (wip) 2026-04-01 21:29:23 -07:00			`if args.output is not None:`
more perf counters 2026-03-31 17:41:15 -07:00			`args.output.write_text(report)`
			`print(f"Wrote {args.output}")`
various performance work (wip) 2026-04-01 21:29:23 -07:00			`elif args.log is None:`
			`print(report, end="")`

			`if args.log is not None:`
			`entry = _render_log_entry(`
			`baseline_path=args.baseline,`
			`label=args.label,`
			`notes=tuple(args.notes),`
			`report=report,`
			`)`
			`with args.log.open("a", encoding="utf-8") as handle:`
			`handle.write(entry)`
			`print(f"Appended {args.log}")`
more perf counters 2026-03-31 17:41:15 -07:00

			`if __name__ == "__main__":`
			`main()`