Build RE baseline and initial Rust workspace

2026-04-02 23:11:15 -07:00 · 2026-04-02 23:11:15 -07:00 · ffaf155ef0
commit ffaf155ef0
parent 8d1f280e2e
39 changed files with 5974 additions and 8 deletions
--- a/tools/py/collect_pe_artifacts.py
+++ b/tools/py/collect_pe_artifacts.py
@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import csv
+import hashlib
+import json
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+
+SUMMARY_KEYS = {
+    "Time/Date",
+    "Magic",
+    "AddressOfEntryPoint",
+    "ImageBase",
+    "Subsystem",
+    "SizeOfImage",
+    "SizeOfCode",
+    "SizeOfInitializedData",
+    "BaseOfCode",
+    "BaseOfData",
+}
+
+KEYWORDS = (
+    "rail",
+    "cargo",
+    "map",
+    "save",
+    "scenario",
+    "debug",
+    "bink",
+    "mss",
+    "direct",
+    "sound",
+    "video",
+    "d3d",
+)
+
+SUBSYSTEM_HINTS = {
+    "startup": {
+        "dlls": {"KERNEL32.dll", "ADVAPI32.dll"},
+        "strings": ("debug", "OutputDebugStringA"),
+    },
+    "ui": {
+        "dlls": {"USER32.dll", "comdlg32.dll", "GDI32.dll"},
+        "strings": ("MessageBoxA", "CreateWindowExA"),
+    },
+    "render": {
+        "dlls": {"d3d8.dll"},
+        "strings": ("Direct3D", "Hardware T & L", "Video.win"),
+    },
+    "audio": {
+        "dlls": {"mss32.dll", "DSOUND.dll"},
+        "strings": ("DirectSound", "PaintSound.win", "Data\\Sound"),
+    },
+    "input": {
+        "dlls": {"DINPUT8.dll"},
+        "strings": ("DirectInput8Create",),
+    },
+    "network": {
+        "dlls": {"WS2_32.dll", "WSOCK32.dll"},
+        "strings": ("Direct Play", "HOST version"),
+    },
+    "filesystem": {
+        "dlls": {"KERNEL32.dll"},
+        "strings": ("Saved Games", ".\\Maps\\", "MapViewOfFile"),
+    },
+    "resource": {
+        "dlls": {"VERSION.dll", "ole32.dll"},
+        "strings": ("CargoIcons", "CargoModels", ".imb"),
+    },
+    "map": {
+        "dlls": set(),
+        "strings": ("gptGameMap", "maps\\*.gmp", "Map Description"),
+    },
+    "scenario": {
+        "dlls": set(),
+        "strings": ("Scenario Text File", "Campaign Scenario"),
+    },
+    "save": {
+        "dlls": set(),
+        "strings": ("Quicksave", "Save Game:", "Saved Games"),
+    },
+}
+
+
+def run_command(*args: str) -> str:
+    return subprocess.run(
+        args,
+        check=True,
+        text=True,
+        capture_output=True,
+    ).stdout
+
+
+def sha256(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def parse_summary(text: str) -> dict[str, str]:
+    summary: dict[str, str] = {}
+    for line in text.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        match = re.match(r"([A-Za-z/]+)\s+(.+)", line)
+        if not match:
+            continue
+        key, value = match.groups()
+        if key in SUMMARY_KEYS:
+            summary[key] = value.strip()
+    return summary
+
+
+def parse_sections(text: str) -> list[dict[str, str]]:
+    sections: list[dict[str, str]] = []
+    lines = text.splitlines()
+    for index, line in enumerate(lines):
+        match = re.match(
+            r"\s*(\d+)\s+(\S+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)",
+            line,
+        )
+        if not match:
+            continue
+        idx, name, size, vma, lma, file_off = match.groups()
+        flags = lines[index + 1].strip() if index + 1 < len(lines) else ""
+        sections.append(
+            {
+                "idx": idx,
+                "name": name,
+                "size_hex": f"0x{size.lower()}",
+                "vma": f"0x{vma.lower()}",
+                "lma": f"0x{lma.lower()}",
+                "file_offset": f"0x{file_off.lower()}",
+                "flags": flags,
+            }
+        )
+    return sections
+
+
+def parse_imports(text: str) -> tuple[list[str], list[dict[str, str]]]:
+    dlls: list[str] = []
+    functions: list[dict[str, str]] = []
+    current_dll = ""
+    in_imports = False
+    for raw_line in text.splitlines():
+        line = raw_line.rstrip()
+        dll_match = re.match(r"\s*DLL Name:\s+(.+)", line)
+        if dll_match:
+            current_dll = dll_match.group(1).strip()
+            dlls.append(current_dll)
+            in_imports = False
+            continue
+        if "Hint/Ord" in line and "Name" in line:
+            in_imports = True
+            continue
+        if not in_imports or not current_dll:
+            continue
+        fn_match = re.match(r"\s*([0-9]+)\s+(.+)", line)
+        if fn_match:
+            hint, name = fn_match.groups()
+            functions.append({"dll": current_dll, "hint": hint, "name": name.strip()})
+        elif line.strip() == "":
+            in_imports = False
+    return dlls, functions
+
+
+def interesting_strings(text: str) -> list[str]:
+    hits: list[str] = []
+    seen: set[str] = set()
+    for line in text.splitlines():
+        lowered = line.lower()
+        if any(keyword in lowered for keyword in KEYWORDS):
+            stripped = line.strip()
+            if stripped and stripped not in seen:
+                hits.append(stripped)
+                seen.add(stripped)
+    return hits
+
+
+def build_subsystem_inventory(dlls: list[str], strings_found: list[str]) -> str:
+    present_dlls = set(dlls)
+    lower_strings = [entry.lower() for entry in strings_found]
+    lines = ["# Starter Subsystem Inventory", ""]
+    for name, hints in SUBSYSTEM_HINTS.items():
+        matched_dlls = sorted(present_dlls.intersection(hints["dlls"]))
+        matched_strings = [
+            entry
+            for entry in strings_found
+            if any(marker.lower() in entry.lower() for marker in hints["strings"])
+        ]
+        if not matched_dlls and not matched_strings:
+            continue
+        evidence = []
+        if matched_dlls:
+            evidence.append("DLLs: " + ", ".join(matched_dlls))
+        if matched_strings:
+            evidence.append("strings: " + "; ".join(matched_strings[:4]))
+        lines.append(f"## {name}")
+        lines.append("")
+        lines.append("- Evidence: " + " | ".join(evidence))
+        lines.append("- Status: initial hypothesis only")
+        lines.append("")
+    unknown_count = sum(1 for entry in lower_strings if "debug" in entry or "map" in entry)
+    lines.append("## unknown")
+    lines.append("")
+    lines.append(f"- Evidence: {unknown_count} broad strings still need manual triage")
+    lines.append("- Status: expected until GUI analysis identifies real call sites")
+    lines.append("")
+    return "\n".join(lines)
+
+
+def write_csv(path: Path, fieldnames: list[str], rows: list[dict[str, str]]) -> None:
+    with path.open("w", newline="", encoding="utf-8") as handle:
+        writer = csv.DictWriter(handle, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+
+
+def main() -> int:
+    if len(sys.argv) != 3:
+        print("usage: collect_pe_artifacts.py <exe-path> <output-dir>", file=sys.stderr)
+        return 2
+
+    exe_path = Path(sys.argv[1]).resolve()
+    out_dir = Path(sys.argv[2]).resolve()
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    file_output = run_command("file", str(exe_path)).strip()
+    section_output = run_command("objdump", "-h", str(exe_path))
+    pe_output = run_command("llvm-objdump", "-p", str(exe_path))
+    strings_output = run_command("strings", "-n", "4", str(exe_path))
+
+    summary = parse_summary(pe_output)
+    sections = parse_sections(section_output)
+    dlls, functions = parse_imports(pe_output)
+    strings_found = interesting_strings(strings_output)
+
+    summary_payload = {
+        "path": str(exe_path),
+        "sha256": sha256(exe_path),
+        "size_bytes": exe_path.stat().st_size,
+        "file": file_output,
+        "summary": summary,
+        "imported_dll_count": len(dlls),
+        "imported_function_count": len(functions),
+    }
+
+    (out_dir / "binary-summary.json").write_text(
+        json.dumps(summary_payload, indent=2, sort_keys=True) + "\n",
+        encoding="utf-8",
+    )
+    write_csv(
+        out_dir / "sections.csv",
+        ["idx", "name", "size_hex", "vma", "lma", "file_offset", "flags"],
+        sections,
+    )
+    (out_dir / "imported-dlls.txt").write_text("\n".join(dlls) + "\n", encoding="utf-8")
+    write_csv(out_dir / "imported-functions.csv", ["dll", "hint", "name"], functions)
+    (out_dir / "interesting-strings.txt").write_text(
+        "\n".join(strings_found) + "\n",
+        encoding="utf-8",
+    )
+    (out_dir / "subsystem-inventory.md").write_text(
+        build_subsystem_inventory(dlls, strings_found),
+        encoding="utf-8",
+    )
+
+    entry_rva = summary.get("AddressOfEntryPoint", "")
+    image_base = summary.get("ImageBase", "")
+    entry_va = ""
+    if entry_rva and image_base:
+        entry_va = hex(int(entry_rva, 16) + int(image_base, 16))
+    write_csv(
+        out_dir / "function-map.csv",
+        [
+            "address",
+            "size",
+            "name",
+            "subsystem",
+            "calling_convention",
+            "prototype_status",
+            "source_tool",
+            "confidence",
+            "notes",
+            "verified_against",
+        ],
+        [
+            {
+                "address": entry_va,
+                "size": "",
+                "name": "entrypoint_1_06",
+                "subsystem": "startup",
+                "calling_convention": "unknown",
+                "prototype_status": "unknown",
+                "source_tool": "llvm-objdump",
+                "confidence": "2",
+                "notes": "Seed row from PE header entrypoint; function boundary still needs GUI confirmation.",
+                "verified_against": f"sha256:{summary_payload['sha256']}",
+            }
+        ],
+    )
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tools/py/export_analysis_context.py
+++ b/tools/py/export_analysis_context.py
@ -0,0 +1,522 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import re
+import subprocess
+import sys
+from bisect import bisect_right
+from functools import lru_cache
+from pathlib import Path
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Export reusable address and string context for focused RT3 analysis passes."
+    )
+    parser.add_argument("exe_path", type=Path)
+    parser.add_argument("output_dir", type=Path)
+    parser.add_argument(
+        "--addr",
+        action="append",
+        default=[],
+        help="Function or code address in hex. May be repeated.",
+    )
+    parser.add_argument(
+        "--string",
+        action="append",
+        default=[],
+        help="String text to resolve. May be repeated.",
+    )
+    args = parser.parse_args()
+    if not args.addr and not args.string:
+        parser.error("at least one --addr or --string target is required")
+    return args
+
+
+def parse_hex(text: str) -> int:
+    value = text.strip().lower()
+    if value.startswith("0x"):
+        value = value[2:]
+    return int(value, 16)
+
+
+def fmt_addr(value: int) -> str:
+    return f"0x{value:08x}"
+
+
+def display_string(text: str) -> str:
+    return text.encode("unicode_escape").decode("ascii")
+
+
+def clean_json_payload(text: str) -> str:
+    stripped = text.strip()
+    if not stripped:
+        raise ValueError("rizin returned empty output")
+    starts = [index for index in (stripped.find("["), stripped.find("{")) if index >= 0]
+    if not starts:
+        raise ValueError("rizin did not return JSON")
+    return stripped[min(starts) :]
+
+
+def run_rizin_json(exe_path: Path, command: str) -> object:
+    result = subprocess.run(
+        [
+            "rizin",
+            "-q",
+            "-e",
+            "scr.color=false",
+            "-c",
+            command,
+            str(exe_path),
+        ],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    return json.loads(clean_json_payload(result.stdout))
+
+
+def run_objdump_excerpt(exe_path: Path, address: int, radius: int = 0x20) -> str:
+    start = max(address - radius, 0)
+    stop = address + radius
+    result = subprocess.run(
+        [
+            "llvm-objdump",
+            "-d",
+            "--no-show-raw-insn",
+            f"--start-address={fmt_addr(start)}",
+            f"--stop-address={fmt_addr(stop)}",
+            str(exe_path),
+        ],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    lines = [
+        line.rstrip()
+        for line in result.stdout.splitlines()
+        if re.match(r"^\s*[0-9a-fA-F]+:", line)
+    ]
+    return "\n".join(lines)
+
+
+def load_curated_rows(path: Path) -> dict[int, dict[str, str]]:
+    if not path.exists():
+        return {}
+    with path.open(newline="", encoding="utf-8") as handle:
+        rows = csv.DictReader(handle)
+        return {parse_hex(row["address"]): dict(row) for row in rows}
+
+
+class FunctionIndex:
+    def __init__(self, rows: list[dict[str, object]], curated_names: dict[int, str]):
+        self.rows = sorted(rows, key=lambda row: int(row["offset"]))
+        self.by_start = {int(row["offset"]): row for row in self.rows}
+        self.starts = [int(row["offset"]) for row in self.rows]
+        self.curated_names = curated_names
+
+    def get_exact(self, address: int) -> dict[str, object] | None:
+        return self.by_start.get(address)
+
+    def find_containing(self, address: int) -> dict[str, object] | None:
+        index = bisect_right(self.starts, address) - 1
+        if index < 0:
+            return None
+        row = self.rows[index]
+        start = int(row["offset"])
+        end = int(row.get("maxbound", start + int(row.get("size", 0))))
+        if start <= address < end:
+            return row
+        return None
+
+    def preferred_name(self, row: dict[str, object]) -> str:
+        start = int(row["offset"])
+        return self.curated_names.get(start, str(row["name"]))
+
+
+class ContextExporter:
+    def __init__(self, exe_path: Path, output_dir: Path):
+        self.exe_path = exe_path.resolve()
+        self.output_dir = output_dir.resolve()
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+        curated_map = self.output_dir / "function-map.csv"
+        self.curated_rows = load_curated_rows(curated_map)
+        self.curated_names = {
+            address: row["name"] for address, row in self.curated_rows.items()
+        }
+        self.function_index = FunctionIndex(
+            self.load_function_rows(),
+            self.curated_names,
+        )
+        self.strings = list(run_rizin_json(self.exe_path, "izzj"))
+        self.strings_by_addr = {int(entry["vaddr"]): entry for entry in self.strings}
+
+    def load_function_rows(self) -> list[dict[str, object]]:
+        rows = list(run_rizin_json(self.exe_path, "aaa; aflj"))
+        known_starts = {int(row["offset"]) for row in rows}
+        missing_curated = sorted(address for address in self.curated_names if address not in known_starts)
+        if not missing_curated:
+            return rows
+
+        define_cmd = "aaa; " + "; ".join(
+            f"af @ {fmt_addr(address)}" for address in missing_curated
+        ) + "; aflj"
+        return list(run_rizin_json(self.exe_path, define_cmd))
+
+    @lru_cache(maxsize=None)
+    def xrefs_to(self, address: int) -> list[dict[str, object]]:
+        return list(run_rizin_json(self.exe_path, f"aaa; axtj @ {fmt_addr(address)}"))
+
+    @lru_cache(maxsize=None)
+    def excerpt(self, address: int) -> str:
+        return run_objdump_excerpt(self.exe_path, address)
+
+    def fallback_function(self, address: int) -> dict[str, object] | None:
+        curated = self.curated_rows.get(address)
+        if curated is None:
+            return None
+
+        size = int(curated["size"])
+        return {
+            "offset": address,
+            "name": curated["name"],
+            "size": size,
+            "maxbound": address + size,
+            "calltype": curated["calling_convention"],
+            "signature": "",
+            "codexrefs": self.xrefs_to(address),
+            "callrefs": [],
+            "datarefs": [],
+            "synthetic": True,
+        }
+
+    def resolve_target_function(self, address: int) -> dict[str, object] | None:
+        exact = self.function_index.get_exact(address)
+        if exact is not None:
+            return exact
+
+        fallback = self.fallback_function(address)
+        if fallback is not None:
+            return fallback
+
+        return self.function_index.find_containing(address)
+
+    def resolve_string_matches(self, query: str) -> list[tuple[str, dict[str, object]]]:
+        exact = [entry for entry in self.strings if str(entry.get("string", "")) == query]
+        if exact:
+            return [("exact", entry) for entry in exact]
+        partial = [entry for entry in self.strings if query in str(entry.get("string", ""))]
+        return [("substring", entry) for entry in partial]
+
+    def format_callers(self, row: dict[str, object]) -> list[dict[str, object]]:
+        callers: list[dict[str, object]] = []
+        for ref in row.get("codexrefs", []):
+            if ref.get("type") != "CALL":
+                continue
+            call_site = int(ref["from"])
+            caller = self.function_index.find_containing(call_site)
+            callers.append(
+                {
+                    "call_site": call_site,
+                    "function": caller,
+                }
+            )
+        callers.sort(key=lambda entry: entry["call_site"])
+        return callers
+
+    def format_callees(self, row: dict[str, object]) -> list[dict[str, object]]:
+        callees: list[dict[str, object]] = []
+        seen: set[tuple[int, int]] = set()
+        for ref in row.get("callrefs", []):
+            if ref.get("type") != "CALL":
+                continue
+            call_site = int(ref["from"])
+            callee_site = int(ref["to"])
+            callee = self.function_index.find_containing(callee_site)
+            if callee is None:
+                continue
+            key = (call_site, int(callee["offset"]))
+            if key in seen:
+                continue
+            seen.add(key)
+            callees.append(
+                {
+                    "call_site": call_site,
+                    "function": callee,
+                }
+            )
+        callees.sort(key=lambda entry: (int(entry["function"]["offset"]), entry["call_site"]))
+        return callees
+
+    def format_data_refs(self, row: dict[str, object]) -> list[dict[str, object]]:
+        refs: list[dict[str, object]] = []
+        seen: set[tuple[int, int, str]] = set()
+        for ref in row.get("datarefs", []):
+            from_addr = int(ref["from"])
+            to_addr = int(ref["to"])
+            ref_type = str(ref.get("type", "DATA"))
+            key = (from_addr, to_addr, ref_type)
+            if key in seen:
+                continue
+            seen.add(key)
+            refs.append(
+                {
+                    "from": from_addr,
+                    "to": to_addr,
+                    "type": ref_type,
+                    "string": self.strings_by_addr.get(to_addr),
+                }
+            )
+        refs.sort(key=lambda entry: (entry["to"], entry["from"]))
+        return refs
+
+    def build_function_rows(self, targets: list[int]) -> list[dict[str, str]]:
+        rows: list[dict[str, str]] = []
+        for query_address in sorted(dict.fromkeys(targets)):
+            function = self.resolve_target_function(query_address)
+            if function is None:
+                raise ValueError(f"no function found for {fmt_addr(query_address)}")
+
+            callers = self.format_callers(function)
+            callees = self.format_callees(function)
+            data_refs = self.format_data_refs(function)
+
+            rows.append(
+                {
+                    "query_address": fmt_addr(query_address),
+                    "function_address": fmt_addr(int(function["offset"])),
+                    "name": self.function_index.preferred_name(function),
+                    "size": str(function["size"]),
+                    "calling_convention": str(function.get("calltype", "unknown")),
+                    "signature": str(function.get("signature", "")),
+                    "caller_count": str(len(callers)),
+                    "callers": "; ".join(
+                        self.describe_caller(entry["call_site"], entry["function"])
+                        for entry in callers
+                    ),
+                    "callee_count": str(len(callees)),
+                    "callees": "; ".join(
+                        self.describe_callee(entry["call_site"], entry["function"])
+                        for entry in callees
+                    ),
+                    "data_ref_count": str(len(data_refs)),
+                    "data_refs": "; ".join(self.describe_data_ref(entry) for entry in data_refs),
+                    "entry_excerpt": self.excerpt(int(function["offset"])).replace("\n", " | "),
+                }
+            )
+        return rows
+
+    def build_string_rows(self, targets: list[str]) -> list[dict[str, str]]:
+        rows: list[dict[str, str]] = []
+        for query in targets:
+            matches = self.resolve_string_matches(query)
+            if not matches:
+                raise ValueError(f"no string match found for {query!r}")
+
+            for match_kind, string_entry in matches:
+                address = int(string_entry["vaddr"])
+                xrefs = self.xrefs_to(address)
+                rows.append(
+                    {
+                        "query_text": query,
+                        "match_kind": match_kind,
+                        "string_address": fmt_addr(address),
+                        "string_text": display_string(str(string_entry["string"])),
+                        "xref_count": str(len(xrefs)),
+                        "xrefs": "; ".join(self.describe_string_xref(entry) for entry in xrefs),
+                    }
+                )
+        rows.sort(key=lambda row: (row["query_text"], row["string_address"]))
+        return rows
+
+    def describe_caller(self, call_site: int, function: dict[str, object] | None) -> str:
+        if function is None:
+            return fmt_addr(call_site)
+        return (
+            f"{fmt_addr(call_site)}@{fmt_addr(int(function['offset']))}:"
+            f"{self.function_index.preferred_name(function)}"
+        )
+
+    def describe_callee(self, call_site: int, function: dict[str, object] | None) -> str:
+        if function is None:
+            return fmt_addr(call_site)
+        return (
+            f"{fmt_addr(call_site)}->{fmt_addr(int(function['offset']))}:"
+            f"{self.function_index.preferred_name(function)}"
+        )
+
+    def describe_data_ref(self, entry: dict[str, object]) -> str:
+        target = fmt_addr(int(entry["to"]))
+        string_entry = entry["string"]
+        if string_entry is not None:
+            target += f':"{display_string(str(string_entry["string"]))}"'
+        return f"{fmt_addr(int(entry['from']))}->{target}"
+
+    def describe_string_xref(self, entry: dict[str, object]) -> str:
+        from_addr = int(entry["from"])
+        ref_type = str(entry.get("type", "DATA"))
+        function = self.function_index.find_containing(from_addr)
+        if function is None:
+            return f"{fmt_addr(from_addr)}:{ref_type}"
+        return (
+            f"{fmt_addr(from_addr)}@{fmt_addr(int(function['offset']))}:"
+            f"{self.function_index.preferred_name(function)}:{ref_type}"
+        )
+
+    def write_csv(self, path: Path, rows: list[dict[str, str]]) -> None:
+        if not rows:
+            return
+        with path.open("w", newline="", encoding="utf-8") as handle:
+            writer = csv.DictWriter(handle, fieldnames=list(rows[0].keys()))
+            writer.writeheader()
+            writer.writerows(rows)
+
+    def write_markdown(self, function_targets: list[int], string_targets: list[str]) -> None:
+        lines = [
+            "# Analysis Context",
+            "",
+            f"- Target binary: `{self.exe_path}`",
+            "- Function names prefer the curated ledger when a committed mapping exists.",
+            "",
+        ]
+
+        if function_targets:
+            lines.extend(["## Function Targets", ""])
+            for query_address in sorted(dict.fromkeys(function_targets)):
+                function = self.resolve_target_function(query_address)
+                if function is None:
+                    continue
+                function_address = int(function["offset"])
+                callers = self.format_callers(function)
+                callees = self.format_callees(function)
+                data_refs = self.format_data_refs(function)
+
+                lines.append(
+                    f"### `{fmt_addr(query_address)}` -> `{fmt_addr(function_address)}` `{self.function_index.preferred_name(function)}`"
+                )
+                lines.append("")
+                lines.append(f"- Size: `{function['size']}`")
+                lines.append(f"- Calling convention: `{function.get('calltype', 'unknown')}`")
+                lines.append(f"- Signature: `{function.get('signature', '')}`")
+                lines.append("")
+                lines.append("Entry excerpt:")
+                lines.append("")
+                lines.append("```asm")
+                lines.append(self.excerpt(function_address))
+                lines.append("```")
+                lines.append("")
+                lines.append("Callers:")
+                for entry in callers:
+                    function_row = entry["function"]
+                    if function_row is None:
+                        lines.append(f"- `{fmt_addr(entry['call_site'])}`")
+                    else:
+                        lines.append(
+                            f"- `{fmt_addr(entry['call_site'])}` in `{fmt_addr(int(function_row['offset']))}` `{self.function_index.preferred_name(function_row)}`"
+                        )
+                if not callers:
+                    lines.append("- none")
+                lines.append("")
+                if callers:
+                    lines.append("Caller xref excerpts:")
+                    lines.append("")
+                    for entry in callers:
+                        lines.append(f"#### `{fmt_addr(entry['call_site'])}`")
+                        lines.append("")
+                        lines.append("```asm")
+                        lines.append(self.excerpt(entry["call_site"]))
+                        lines.append("```")
+                        lines.append("")
+
+                lines.append("Direct internal callees:")
+                for entry in callees:
+                    function_row = entry["function"]
+                    lines.append(
+                        f"- `{fmt_addr(entry['call_site'])}` -> `{fmt_addr(int(function_row['offset']))}` `{self.function_index.preferred_name(function_row)}`"
+                    )
+                if not callees:
+                    lines.append("- none")
+                lines.append("")
+
+                lines.append("Data refs:")
+                for entry in data_refs:
+                    target = fmt_addr(int(entry["to"]))
+                    string_entry = entry["string"]
+                    if string_entry is not None:
+                        lines.append(
+                            f'- `{fmt_addr(int(entry["from"]))}` -> `{target}` "{display_string(str(string_entry["string"]))}"'
+                        )
+                    else:
+                        lines.append(f"- `{fmt_addr(int(entry['from']))}` -> `{target}`")
+                if not data_refs:
+                    lines.append("- none")
+                lines.append("")
+
+        if string_targets:
+            lines.extend(["## String Targets", ""])
+            for query in string_targets:
+                matches = self.resolve_string_matches(query)
+                for match_kind, string_entry in matches:
+                    address = int(string_entry["vaddr"])
+                    xrefs = self.xrefs_to(address)
+                    lines.append(
+                        f"### `{query}` -> `{fmt_addr(address)}`"
+                    )
+                    lines.append("")
+                    lines.append(f"- Match kind: `{match_kind}`")
+                    lines.append(f'- String text: "{display_string(str(string_entry["string"]))}"')
+                    lines.append("")
+                    lines.append("Xrefs:")
+                    for entry in xrefs:
+                        from_addr = int(entry["from"])
+                        function = self.function_index.find_containing(from_addr)
+                        ref_type = str(entry.get("type", "DATA"))
+                        if function is None:
+                            lines.append(f"- `{fmt_addr(from_addr)}` `{ref_type}`")
+                        else:
+                            lines.append(
+                                f"- `{fmt_addr(from_addr)}` in `{fmt_addr(int(function['offset']))}` `{self.function_index.preferred_name(function)}` `{ref_type}`"
+                            )
+                    if not xrefs:
+                        lines.append("- none")
+                    lines.append("")
+
+                    if xrefs:
+                        lines.append("Xref excerpts:")
+                        lines.append("")
+                        for entry in xrefs:
+                            from_addr = int(entry["from"])
+                            lines.append(f"#### `{fmt_addr(from_addr)}`")
+                            lines.append("")
+                            lines.append("```asm")
+                            lines.append(self.excerpt(from_addr))
+                            lines.append("```")
+                            lines.append("")
+
+        (self.output_dir / "analysis-context.md").write_text(
+            "\n".join(lines) + "\n",
+            encoding="utf-8",
+        )
+
+
+def main() -> int:
+    args = parse_args()
+    exporter = ContextExporter(args.exe_path, args.output_dir)
+    function_targets = [parse_hex(value) for value in args.addr]
+    string_targets = list(args.string)
+
+    function_rows = exporter.build_function_rows(function_targets)
+    string_rows = exporter.build_string_rows(string_targets)
+
+    exporter.write_csv(exporter.output_dir / "analysis-context-functions.csv", function_rows)
+    exporter.write_csv(exporter.output_dir / "analysis-context-strings.csv", string_rows)
+    exporter.write_markdown(function_targets, string_targets)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tools/py/export_startup_map.py
+++ b/tools/py/export_startup_map.py
@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+
+DEFAULT_DEPTH = "2"
+DEFAULT_ROOTS = [
+    ("entry", "0x005a313b"),
+    ("bootstrap", "0x00484440"),
+]
+
+
+def parse_args(argv: list[str]) -> tuple[Path, Path, str, list[tuple[str, str]]]:
+    if len(argv) < 3:
+        print(
+            "usage: export_startup_map.py <exe-path> <output-dir> [--depth N] [--root NAME:ADDR ...]",
+            file=sys.stderr,
+        )
+        raise SystemExit(2)
+
+    exe_path = Path(argv[1]).resolve()
+    output_dir = Path(argv[2]).resolve()
+    depth = DEFAULT_DEPTH
+    roots = list(DEFAULT_ROOTS)
+
+    index = 3
+    while index < len(argv):
+        token = argv[index]
+        if token == "--depth":
+            if index + 1 >= len(argv):
+                print("--depth requires a value", file=sys.stderr)
+                raise SystemExit(2)
+            depth = argv[index + 1]
+            index += 2
+            continue
+
+        if token == "--root":
+            if index + 1 >= len(argv):
+                print("--root requires NAME:ADDR", file=sys.stderr)
+                raise SystemExit(2)
+            root_spec = argv[index + 1]
+            if ":" not in root_spec:
+                print("--root value must be NAME:ADDR", file=sys.stderr)
+                raise SystemExit(2)
+            name, address = root_spec.split(":", 1)
+            if not name or not address:
+                print("--root value must be NAME:ADDR", file=sys.stderr)
+                raise SystemExit(2)
+            if roots == DEFAULT_ROOTS:
+                roots = []
+            roots.append((name, address))
+            index += 2
+            continue
+
+        print("unknown argument: %s" % token, file=sys.stderr)
+        raise SystemExit(2)
+
+    return exe_path, output_dir, depth, roots
+
+
+def main() -> int:
+    repo_root = Path(__file__).resolve().parents[2]
+    exe_path, output_dir, depth, roots = parse_args(sys.argv)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    ghidra_home = Path.home() / "software" / "ghidra"
+    analyze_headless = ghidra_home / "support" / "analyzeHeadless"
+    if not analyze_headless.exists():
+        print("analyzeHeadless not found at %s" % analyze_headless, file=sys.stderr)
+        return 1
+
+    project_dir = repo_root / "ghidra_projects"
+    project_dir.mkdir(parents=True, exist_ok=True)
+    project_name = "rt3-1.06"
+    script_path = repo_root / "tools" / "ghidra" / "scripts"
+    ghidra_runtime = repo_root / ".ghidra"
+    ghidra_home_dir = ghidra_runtime / "home"
+    ghidra_config_dir = ghidra_runtime / "config"
+    ghidra_cache_dir = ghidra_runtime / "cache"
+    ghidra_java_prefs = ghidra_runtime / "java-prefs"
+    ghidra_home_dir.mkdir(parents=True, exist_ok=True)
+    ghidra_config_dir.mkdir(parents=True, exist_ok=True)
+    ghidra_cache_dir.mkdir(parents=True, exist_ok=True)
+    ghidra_java_prefs.mkdir(parents=True, exist_ok=True)
+
+    java_bin = shutil.which("java")
+    if java_bin is None:
+        print("java not found on PATH", file=sys.stderr)
+        return 1
+    java_home = Path(java_bin).resolve().parents[1]
+
+    command = [
+        str(analyze_headless),
+        str(project_dir),
+        project_name,
+        "-import",
+        str(exe_path),
+        "-overwrite",
+        "-scriptPath",
+        str(script_path),
+        "-postScript",
+        "ExportStartupFunctions.java",
+        str(output_dir),
+        depth,
+    ]
+    command.extend(["%s:%s" % (name, address) for name, address in roots])
+    command.extend([
+        "-analysisTimeoutPerFile",
+        "600",
+    ])
+
+    env = os.environ.copy()
+    env["HOME"] = str(ghidra_home_dir)
+    env["XDG_CONFIG_HOME"] = str(ghidra_config_dir)
+    env["XDG_CACHE_HOME"] = str(ghidra_cache_dir)
+    env["JAVA_HOME"] = str(java_home)
+    env["_JAVA_OPTIONS"] = "-Djava.util.prefs.userRoot=%s" % ghidra_java_prefs
+
+    subprocess.run(command, check=True, env=env)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tools/py/rt3_rekit.py
+++ b/tools/py/rt3_rekit.py
@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from rt3_rekitlib import (
+    PENDING_TEMPLATE_STORE_DEFAULT_SEEDS,
+    export_pending_template_store,
+    parse_hex,
+)
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="RT3 CLI RE kit for repeatable branch-deepening exports."
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    pending = subparsers.add_parser(
+        "pending-template-store",
+        help="Export the pending-template dispatch-store dossier.",
+    )
+    pending.add_argument("exe_path", type=Path)
+    pending.add_argument("output_dir", type=Path)
+    pending.add_argument(
+        "--seed-addr",
+        action="append",
+        default=[],
+        help="Hex seed address. May be repeated.",
+    )
+    return parser
+
+
+def main() -> int:
+    parser = build_parser()
+    args = parser.parse_args()
+
+    if args.command == "pending-template-store":
+        seed_addresses = (
+            [parse_hex(value) for value in args.seed_addr]
+            if args.seed_addr
+            else list(PENDING_TEMPLATE_STORE_DEFAULT_SEEDS)
+        )
+        export_pending_template_store(
+            args.exe_path.resolve(),
+            args.output_dir.resolve(),
+            seed_addresses,
+        )
+        return 0
+
+    parser.error(f"unknown command: {args.command}")
+    return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tools/py/rt3_rekitlib.py
+++ b/tools/py/rt3_rekitlib.py
@ -0,0 +1,543 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import csv
+import json
+import re
+import subprocess
+from bisect import bisect_right
+from functools import lru_cache
+from pathlib import Path
+
+
+PENDING_TEMPLATE_STORE_DEFAULT_SEEDS = [
+    0x0059B2E0,
+    0x0059B710,
+    0x0059B740,
+    0x0059C470,
+    0x0059C540,
+    0x0059C590,
+    0x0059C5B0,
+    0x0059C5E0,
+    0x0059C5F0,
+]
+
+PENDING_TEMPLATE_STORE_ADJACENT_MIN = 0x0059B000
+PENDING_TEMPLATE_STORE_ADJACENT_MAX = 0x0059D000
+DESTRUCTOR_SWITCH_ADDR = 0x0059B2E0
+HEAP_FREE_ADDR = 0x0058F3C0
+
+
+def parse_hex(text: str) -> int:
+    value = text.strip().lower()
+    if value.startswith("0x"):
+        value = value[2:]
+    return int(value, 16)
+
+
+def fmt_addr(value: int) -> str:
+    return f"0x{value:08x}"
+
+
+def display_string(text: str) -> str:
+    return text.encode("unicode_escape").decode("ascii")
+
+
+def clean_json_payload(text: str) -> str:
+    stripped = text.strip()
+    if not stripped:
+        raise ValueError("rizin returned empty output")
+    starts = [index for index in (stripped.find("["), stripped.find("{")) if index >= 0]
+    if not starts:
+        raise ValueError("rizin did not return JSON")
+    return stripped[min(starts) :]
+
+
+def run_rizin_json(exe_path: Path, command: str) -> object:
+    result = subprocess.run(
+        [
+            "rizin",
+            "-q",
+            "-e",
+            "scr.color=false",
+            "-c",
+            command,
+            str(exe_path),
+        ],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    return json.loads(clean_json_payload(result.stdout))
+
+
+def run_objdump_excerpt(exe_path: Path, address: int, radius: int = 0x20) -> str:
+    start = max(address - radius, 0)
+    stop = address + radius
+    result = subprocess.run(
+        [
+            "llvm-objdump",
+            "-d",
+            "--no-show-raw-insn",
+            f"--start-address={fmt_addr(start)}",
+            f"--stop-address={fmt_addr(stop)}",
+            str(exe_path),
+        ],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    lines = [
+        line.rstrip()
+        for line in result.stdout.splitlines()
+        if re.match(r"^\s*[0-9a-fA-F]+:", line)
+    ]
+    return "\n".join(lines)
+
+
+def load_curated_rows(path: Path) -> dict[int, dict[str, str]]:
+    if not path.exists():
+        return {}
+    with path.open(newline="", encoding="utf-8") as handle:
+        rows = csv.DictReader(handle)
+        return {parse_hex(row["address"]): dict(row) for row in rows}
+
+
+class FunctionIndex:
+    def __init__(self, rows: list[dict[str, object]], curated_names: dict[int, str]):
+        self.rows = sorted(rows, key=lambda row: int(row["offset"]))
+        self.by_start = {int(row["offset"]): row for row in self.rows}
+        self.starts = [int(row["offset"]) for row in self.rows]
+        self.curated_names = curated_names
+
+    def get_exact(self, address: int) -> dict[str, object] | None:
+        return self.by_start.get(address)
+
+    def find_containing(self, address: int) -> dict[str, object] | None:
+        index = bisect_right(self.starts, address) - 1
+        if index < 0:
+            return None
+        row = self.rows[index]
+        start = int(row["offset"])
+        end = int(row.get("maxbound", start + int(row.get("size", 0))))
+        if start <= address < end:
+            return row
+        return None
+
+    def preferred_name(self, row: dict[str, object]) -> str:
+        start = int(row["offset"])
+        return self.curated_names.get(start, str(row["name"]))
+
+
+class BranchAnalyzer:
+    def __init__(self, exe_path: Path, output_dir: Path):
+        self.exe_path = exe_path.resolve()
+        self.output_dir = output_dir.resolve()
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+        curated_map = self.output_dir / "function-map.csv"
+        self.curated_rows = load_curated_rows(curated_map)
+        self.curated_names = {
+            address: row["name"] for address, row in self.curated_rows.items()
+        }
+        self.function_index = FunctionIndex(
+            self._load_function_rows(),
+            self.curated_names,
+        )
+        self.strings = list(run_rizin_json(self.exe_path, "izzj"))
+        self.strings_by_addr = {int(entry["vaddr"]): entry for entry in self.strings}
+
+    def _load_function_rows(self) -> list[dict[str, object]]:
+        rows = list(run_rizin_json(self.exe_path, "aaa; aflj"))
+        known_starts = {int(row["offset"]) for row in rows}
+        missing_curated = sorted(
+            address for address in self.curated_names if address not in known_starts
+        )
+        if not missing_curated:
+            return rows
+
+        define_cmd = "aaa; " + "; ".join(
+            f"af @ {fmt_addr(address)}" for address in missing_curated
+        ) + "; aflj"
+        return list(run_rizin_json(self.exe_path, define_cmd))
+
+    @lru_cache(maxsize=None)
+    def xrefs_to(self, address: int) -> list[dict[str, object]]:
+        return list(run_rizin_json(self.exe_path, f"aaa; axtj @ {fmt_addr(address)}"))
+
+    @lru_cache(maxsize=None)
+    def function_pdfj(self, address: int) -> dict[str, object]:
+        payload = run_rizin_json(self.exe_path, f"aaa; s {fmt_addr(address)}; pdfj")
+        if not isinstance(payload, dict):
+            raise TypeError(f"unexpected pdfj payload for {fmt_addr(address)}")
+        return payload
+
+    @lru_cache(maxsize=None)
+    def excerpt(self, address: int) -> str:
+        return run_objdump_excerpt(self.exe_path, address)
+
+    def fallback_function(self, address: int) -> dict[str, object] | None:
+        curated = self.curated_rows.get(address)
+        if curated is None:
+            return None
+
+        size = int(curated["size"])
+        return {
+            "offset": address,
+            "name": curated["name"],
+            "size": size,
+            "maxbound": address + size,
+            "calltype": curated["calling_convention"],
+            "signature": "",
+            "codexrefs": self.xrefs_to(address),
+            "callrefs": [],
+            "datarefs": [],
+            "synthetic": True,
+        }
+
+    def resolve_target_function(self, address: int) -> dict[str, object] | None:
+        exact = self.function_index.get_exact(address)
+        if exact is not None:
+            return exact
+
+        fallback = self.fallback_function(address)
+        if fallback is not None:
+            return fallback
+
+        return self.function_index.find_containing(address)
+
+    def format_callers(self, row: dict[str, object]) -> list[dict[str, object]]:
+        callers: list[dict[str, object]] = []
+        for ref in row.get("codexrefs", []):
+            if ref.get("type") != "CALL":
+                continue
+            call_site = int(ref["from"])
+            caller = self.function_index.find_containing(call_site)
+            callers.append({"call_site": call_site, "function": caller})
+        callers.sort(key=lambda entry: entry["call_site"])
+        return callers
+
+    def format_callees(self, row: dict[str, object]) -> list[dict[str, object]]:
+        callees: list[dict[str, object]] = []
+        seen: set[tuple[int, int]] = set()
+        for ref in row.get("callrefs", []):
+            if ref.get("type") != "CALL":
+                continue
+            call_site = int(ref["from"])
+            callee_site = int(ref["to"])
+            callee = self.function_index.find_containing(callee_site)
+            if callee is None:
+                continue
+            key = (call_site, int(callee["offset"]))
+            if key in seen:
+                continue
+            seen.add(key)
+            callees.append({"call_site": call_site, "function": callee})
+        callees.sort(key=lambda entry: (int(entry["function"]["offset"]), entry["call_site"]))
+        return callees
+
+    def format_data_refs(self, row: dict[str, object]) -> list[dict[str, object]]:
+        refs: list[dict[str, object]] = []
+        seen: set[tuple[int, int, str]] = set()
+        for ref in row.get("datarefs", []):
+            from_addr = int(ref["from"])
+            to_addr = int(ref["to"])
+            ref_type = str(ref.get("type", "DATA"))
+            key = (from_addr, to_addr, ref_type)
+            if key in seen:
+                continue
+            seen.add(key)
+            refs.append(
+                {
+                    "from": from_addr,
+                    "to": to_addr,
+                    "type": ref_type,
+                    "string": self.strings_by_addr.get(to_addr),
+                }
+            )
+        refs.sort(key=lambda entry: (entry["to"], entry["from"]))
+        return refs
+
+    def describe_caller(self, call_site: int, function: dict[str, object] | None) -> str:
+        if function is None:
+            return fmt_addr(call_site)
+        return (
+            f"{fmt_addr(call_site)}@{fmt_addr(int(function['offset']))}:"
+            f"{self.function_index.preferred_name(function)}"
+        )
+
+    def describe_callee(self, call_site: int, function: dict[str, object] | None) -> str:
+        if function is None:
+            return fmt_addr(call_site)
+        return (
+            f"{fmt_addr(call_site)}->{fmt_addr(int(function['offset']))}:"
+            f"{self.function_index.preferred_name(function)}"
+        )
+
+    def describe_data_ref(self, entry: dict[str, object]) -> str:
+        target = fmt_addr(int(entry["to"]))
+        string_entry = entry["string"]
+        if string_entry is not None:
+            target += f':"{display_string(str(string_entry["string"]))}"'
+        return f"{fmt_addr(int(entry['from']))}->{target}"
+
+    def collect_key_constants(self, pdfj: dict[str, object]) -> list[str]:
+        values: list[int] = []
+        seen: set[int] = set()
+        for op in pdfj.get("ops", []):
+            for key in ("val", "ptr"):
+                raw = op.get(key)
+                if not isinstance(raw, int):
+                    continue
+                if raw < 0x10 or raw > 0x100000:
+                    continue
+                if 0x00400000 <= raw <= 0x01000000:
+                    continue
+                if raw in seen:
+                    continue
+                seen.add(raw)
+                values.append(raw)
+        return [fmt_addr(value) for value in values[:10]]
+
+    def collect_key_strings(self, data_refs: list[dict[str, object]]) -> list[str]:
+        strings: list[str] = []
+        seen: set[str] = set()
+        for entry in data_refs:
+            string_entry = entry["string"]
+            if string_entry is None:
+                continue
+            text = display_string(str(string_entry["string"]))
+            if text in seen:
+                continue
+            seen.add(text)
+            strings.append(text)
+        return strings[:8]
+
+    def discover_adjacent_functions(self, addresses: list[int]) -> list[int]:
+        discovered = set(addresses)
+        for address in addresses:
+            function = self.resolve_target_function(address)
+            if function is None:
+                continue
+            for entry in self.format_callers(function):
+                caller = entry["function"]
+                if caller is None:
+                    continue
+                caller_start = int(caller["offset"])
+                if PENDING_TEMPLATE_STORE_ADJACENT_MIN <= caller_start < PENDING_TEMPLATE_STORE_ADJACENT_MAX:
+                    discovered.add(caller_start)
+            for entry in self.format_callees(function):
+                callee = entry["function"]
+                callee_start = int(callee["offset"])
+                if PENDING_TEMPLATE_STORE_ADJACENT_MIN <= callee_start < PENDING_TEMPLATE_STORE_ADJACENT_MAX:
+                    discovered.add(callee_start)
+        return sorted(discovered)
+
+    def build_function_rows(self, addresses: list[int]) -> list[dict[str, str]]:
+        rows: list[dict[str, str]] = []
+        for query_address in self.discover_adjacent_functions(addresses):
+            function = self.resolve_target_function(query_address)
+            if function is None:
+                continue
+            callers = self.format_callers(function)
+            callees = self.format_callees(function)
+            data_refs = self.format_data_refs(function)
+            pdfj = self.function_pdfj(int(function["offset"]))
+            rows.append(
+                {
+                    "query_address": fmt_addr(query_address),
+                    "function_address": fmt_addr(int(function["offset"])),
+                    "name": self.function_index.preferred_name(function),
+                    "size": str(function["size"]),
+                    "calling_convention": str(function.get("calltype", "unknown")),
+                    "caller_count": str(len(callers)),
+                    "callers": "; ".join(
+                        self.describe_caller(entry["call_site"], entry["function"])
+                        for entry in callers
+                    ),
+                    "callee_count": str(len(callees)),
+                    "callees": "; ".join(
+                        self.describe_callee(entry["call_site"], entry["function"])
+                        for entry in callees
+                    ),
+                    "data_ref_count": str(len(data_refs)),
+                    "data_refs": "; ".join(self.describe_data_ref(entry) for entry in data_refs),
+                    "key_constants": "; ".join(self.collect_key_constants(pdfj)),
+                    "key_strings": "; ".join(self.collect_key_strings(data_refs)),
+                    "entry_excerpt": self.excerpt(int(function["offset"])).replace("\n", " | "),
+                }
+            )
+        return rows
+
+    def _case_groups(self, switch_address: int) -> list[dict[str, object]]:
+        pdfj = self.function_pdfj(switch_address)
+        groups: list[dict[str, object]] = []
+        current: dict[str, object] | None = None
+        case_pattern = re.compile(r"^case\.0x[0-9a-f]+\.(\d+)$")
+        default_pattern = re.compile(r"^case\.default\.0x[0-9a-f]+$")
+        for op in pdfj.get("ops", []):
+            labels: list[str] = []
+            for flag in op.get("flags", []):
+                match = case_pattern.match(flag)
+                if match:
+                    labels.append(match.group(1))
+                    continue
+                if default_pattern.match(flag):
+                    labels.append("default")
+            if labels:
+                current = {
+                    "start": int(op["offset"]),
+                    "cases": labels,
+                    "ops": [op],
+                }
+                groups.append(current)
+                continue
+            if current is not None:
+                current["ops"].append(op)
+        return groups
+
+    def _infer_case_shape(self, ops: list[dict[str, object]]) -> tuple[str, str, str]:
+        disasm_lines = [str(op["disasm"]) for op in ops]
+        text = "\n".join(disasm_lines)
+        direct_offsets = {
+            int(match.group(1), 16)
+            for match in re.finditer(r"\[(?:edi|eax|ecx)\+0x([0-9a-f]+)\]", text)
+        }
+        indexed_offsets = {
+            int(match.group(1), 16)
+            for match in re.finditer(r"\[(?:edi|eax|ecx)\+0x([0-9a-f]+)\]", text)
+            if "*4" in text
+        }
+        free_calls = sum(
+            1
+            for op in ops
+            if op.get("jump") == HEAP_FREE_ADDR or "fcn.0058f3c0" in str(op.get("disasm", ""))
+        )
+        has_loop = any("*4" in line for line in disasm_lines)
+
+        fields = ["top-level payload"]
+        for offset in sorted(direct_offsets):
+            fields.append(f"payload+0x{offset:02x}")
+        if has_loop:
+            for offset in sorted(indexed_offsets):
+                fields.append(f"vector@payload+0x{offset:02x}")
+
+        unique_fields = []
+        seen_fields: set[str] = set()
+        for field in fields:
+            if field in seen_fields:
+                continue
+            seen_fields.add(field)
+            unique_fields.append(field)
+
+        if has_loop and len(direct_offsets) >= 3:
+            shape = "pointer vectors with paired side tables"
+        elif has_loop:
+            shape = "indexed pointer vector"
+        elif len(direct_offsets) >= 4:
+            shape = "fixed pointer tuple"
+        elif len(direct_offsets) >= 2:
+            shape = "paired nested pointers"
+        elif free_calls <= 1:
+            shape = "top-level payload pointer"
+        else:
+            shape = "single nested pointer"
+
+        cleanup_summary = f"{free_calls} heap free call(s)"
+        excerpt = " | ".join(disasm_lines[:8])
+        return shape, ", ".join(unique_fields), cleanup_summary + f"; {excerpt}"
+
+    def build_record_kind_rows(self) -> list[dict[str, str]]:
+        rows: list[dict[str, str]] = []
+        for group in self._case_groups(DESTRUCTOR_SWITCH_ADDR):
+            shape, freed_fields, notes = self._infer_case_shape(group["ops"])
+            case_group = ",".join(group["cases"])
+            for case_label in group["cases"]:
+                rows.append(
+                    {
+                        "record_kind": case_label,
+                        "case_group": case_group,
+                        "owning_function": fmt_addr(DESTRUCTOR_SWITCH_ADDR),
+                        "owning_name": self.curated_names.get(
+                            DESTRUCTOR_SWITCH_ADDR,
+                            "multiplayer_transport_destroy_pending_template_dispatch_record",
+                        ),
+                        "inferred_payload_shape": shape,
+                        "freed_fields": freed_fields,
+                        "notes": notes,
+                    }
+                )
+        rows.sort(
+            key=lambda row: (
+                row["record_kind"] == "default",
+                int(row["record_kind"]) if row["record_kind"] != "default" else 0,
+            )
+        )
+        return rows
+
+    def write_csv(self, path: Path, rows: list[dict[str, str]]) -> None:
+        if not rows:
+            return
+        with path.open("w", newline="", encoding="utf-8") as handle:
+            writer = csv.DictWriter(handle, fieldnames=list(rows[0].keys()))
+            writer.writeheader()
+            writer.writerows(rows)
+
+    def write_pending_template_store_markdown(self, function_rows: list[dict[str, str]]) -> None:
+        by_address = {parse_hex(row["function_address"]): row for row in function_rows}
+        sections = {
+            "Init": [0x0059B710, 0x0059C5B0],
+            "Destroy": [0x0059B2E0, 0x0059B740, 0x0059C5E0],
+            "Lookup": [0x0059C540, 0x0059C590],
+            "Prune / Remove": [0x0059C470],
+            "Dispatch / Update": [0x0059C220, 0x0059C5F0],
+        }
+
+        lines = [
+            "# Pending-Template Store Management",
+            "",
+            f"- Target binary: `{self.exe_path}`",
+            "- Scope: companion pending-template dispatch store and its adjacent management helpers.",
+            "",
+        ]
+
+        for title, addresses in sections.items():
+            lines.extend([f"## {title}", ""])
+            for address in addresses:
+                row = by_address.get(address)
+                if row is None:
+                    continue
+                lines.append(f"### `{row['function_address']}` `{row['name']}`")
+                lines.append("")
+                lines.append(f"- Size: `{row['size']}`")
+                lines.append(f"- Calling convention: `{row['calling_convention']}`")
+                lines.append(f"- Callers: {row['callers'] or 'none'}")
+                lines.append(f"- Direct callees: {row['callees'] or 'none'}")
+                lines.append(f"- Data refs: {row['data_refs'] or 'none'}")
+                lines.append(f"- Key constants: {row['key_constants'] or 'none'}")
+                lines.append(f"- Key strings: {row['key_strings'] or 'none'}")
+                lines.append("")
+                lines.append("Entry excerpt:")
+                lines.append("")
+                lines.append("```asm")
+                lines.append(self.excerpt(address))
+                lines.append("```")
+                lines.append("")
+
+        (self.output_dir / "pending-template-store-management.md").write_text(
+            "\n".join(lines) + "\n",
+            encoding="utf-8",
+        )
+
+
+def export_pending_template_store(
+    exe_path: Path,
+    output_dir: Path,
+    seed_addresses: list[int],
+) -> None:
+    analyzer = BranchAnalyzer(exe_path, output_dir)
+    function_rows = analyzer.build_function_rows(seed_addresses)
+    record_rows = analyzer.build_record_kind_rows()
+    analyzer.write_csv(output_dir / "pending-template-store-functions.csv", function_rows)
+    analyzer.write_csv(output_dir / "pending-template-store-record-kinds.csv", record_rows)
+    analyzer.write_pending_template_store_markdown(function_rows)