#!/usr/bin/env python3 from __future__ import annotations import csv import json import re import subprocess from bisect import bisect_right from functools import lru_cache from pathlib import Path PENDING_TEMPLATE_STORE_DEFAULT_SEEDS = [ 0x0059B2E0, 0x0059B710, 0x0059B740, 0x0059C470, 0x0059C540, 0x0059C590, 0x0059C5B0, 0x0059C5E0, 0x0059C5F0, ] PENDING_TEMPLATE_STORE_ADJACENT_MIN = 0x0059B000 PENDING_TEMPLATE_STORE_ADJACENT_MAX = 0x0059D000 DESTRUCTOR_SWITCH_ADDR = 0x0059B2E0 HEAP_FREE_ADDR = 0x0058F3C0 def parse_hex(text: str) -> int: value = text.strip().lower() if value.startswith("0x"): value = value[2:] return int(value, 16) def fmt_addr(value: int) -> str: return f"0x{value:08x}" def display_string(text: str) -> str: return text.encode("unicode_escape").decode("ascii") def clean_json_payload(text: str) -> str: stripped = text.strip() if not stripped: raise ValueError("rizin returned empty output") starts = [index for index in (stripped.find("["), stripped.find("{")) if index >= 0] if not starts: raise ValueError("rizin did not return JSON") return stripped[min(starts) :] def run_rizin_json(exe_path: Path, command: str) -> object: result = subprocess.run( [ "rizin", "-q", "-e", "scr.color=false", "-c", command, str(exe_path), ], check=True, capture_output=True, text=True, ) return json.loads(clean_json_payload(result.stdout)) def run_objdump_excerpt(exe_path: Path, address: int, radius: int = 0x20) -> str: start = max(address - radius, 0) stop = address + radius result = subprocess.run( [ "llvm-objdump", "-d", "--no-show-raw-insn", f"--start-address={fmt_addr(start)}", f"--stop-address={fmt_addr(stop)}", str(exe_path), ], check=True, capture_output=True, text=True, ) lines = [ line.rstrip() for line in result.stdout.splitlines() if re.match(r"^\s*[0-9a-fA-F]+:", line) ] return "\n".join(lines) def load_curated_rows(path: Path) -> dict[int, dict[str, str]]: if not path.exists(): return {} with path.open(newline="", encoding="utf-8") as handle: rows = csv.DictReader(handle) return {parse_hex(row["address"]): dict(row) for row in rows} class FunctionIndex: def __init__(self, rows: list[dict[str, object]], curated_names: dict[int, str]): self.rows = sorted(rows, key=lambda row: int(row["offset"])) self.by_start = {int(row["offset"]): row for row in self.rows} self.starts = [int(row["offset"]) for row in self.rows] self.curated_names = curated_names def get_exact(self, address: int) -> dict[str, object] | None: return self.by_start.get(address) def find_containing(self, address: int) -> dict[str, object] | None: index = bisect_right(self.starts, address) - 1 if index < 0: return None row = self.rows[index] start = int(row["offset"]) end = int(row.get("maxbound", start + int(row.get("size", 0)))) if start <= address < end: return row return None def preferred_name(self, row: dict[str, object]) -> str: start = int(row["offset"]) return self.curated_names.get(start, str(row["name"])) class BranchAnalyzer: def __init__(self, exe_path: Path, output_dir: Path): self.exe_path = exe_path.resolve() self.output_dir = output_dir.resolve() self.output_dir.mkdir(parents=True, exist_ok=True) curated_map = self.output_dir / "function-map.csv" self.curated_rows = load_curated_rows(curated_map) self.curated_names = { address: row["name"] for address, row in self.curated_rows.items() } self.function_index = FunctionIndex( self._load_function_rows(), self.curated_names, ) self.strings = list(run_rizin_json(self.exe_path, "izzj")) self.strings_by_addr = {int(entry["vaddr"]): entry for entry in self.strings} def _load_function_rows(self) -> list[dict[str, object]]: rows = list(run_rizin_json(self.exe_path, "aaa; aflj")) known_starts = {int(row["offset"]) for row in rows} missing_curated = sorted( address for address in self.curated_names if address not in known_starts ) if not missing_curated: return rows define_cmd = "aaa; " + "; ".join( f"af @ {fmt_addr(address)}" for address in missing_curated ) + "; aflj" return list(run_rizin_json(self.exe_path, define_cmd)) @lru_cache(maxsize=None) def xrefs_to(self, address: int) -> list[dict[str, object]]: return list(run_rizin_json(self.exe_path, f"aaa; axtj @ {fmt_addr(address)}")) @lru_cache(maxsize=None) def function_pdfj(self, address: int) -> dict[str, object]: payload = run_rizin_json(self.exe_path, f"aaa; s {fmt_addr(address)}; pdfj") if not isinstance(payload, dict): raise TypeError(f"unexpected pdfj payload for {fmt_addr(address)}") return payload @lru_cache(maxsize=None) def excerpt(self, address: int) -> str: return run_objdump_excerpt(self.exe_path, address) def fallback_function(self, address: int) -> dict[str, object] | None: curated = self.curated_rows.get(address) if curated is None: return None size = int(curated["size"]) return { "offset": address, "name": curated["name"], "size": size, "maxbound": address + size, "calltype": curated["calling_convention"], "signature": "", "codexrefs": self.xrefs_to(address), "callrefs": [], "datarefs": [], "synthetic": True, } def resolve_target_function(self, address: int) -> dict[str, object] | None: exact = self.function_index.get_exact(address) if exact is not None: return exact fallback = self.fallback_function(address) if fallback is not None: return fallback return self.function_index.find_containing(address) def format_callers(self, row: dict[str, object]) -> list[dict[str, object]]: callers: list[dict[str, object]] = [] for ref in row.get("codexrefs", []): if ref.get("type") != "CALL": continue call_site = int(ref["from"]) caller = self.function_index.find_containing(call_site) callers.append({"call_site": call_site, "function": caller}) callers.sort(key=lambda entry: entry["call_site"]) return callers def format_callees(self, row: dict[str, object]) -> list[dict[str, object]]: callees: list[dict[str, object]] = [] seen: set[tuple[int, int]] = set() for ref in row.get("callrefs", []): if ref.get("type") != "CALL": continue call_site = int(ref["from"]) callee_site = int(ref["to"]) callee = self.function_index.find_containing(callee_site) if callee is None: continue key = (call_site, int(callee["offset"])) if key in seen: continue seen.add(key) callees.append({"call_site": call_site, "function": callee}) callees.sort(key=lambda entry: (int(entry["function"]["offset"]), entry["call_site"])) return callees def format_data_refs(self, row: dict[str, object]) -> list[dict[str, object]]: refs: list[dict[str, object]] = [] seen: set[tuple[int, int, str]] = set() for ref in row.get("datarefs", []): from_addr = int(ref["from"]) to_addr = int(ref["to"]) ref_type = str(ref.get("type", "DATA")) key = (from_addr, to_addr, ref_type) if key in seen: continue seen.add(key) refs.append( { "from": from_addr, "to": to_addr, "type": ref_type, "string": self.strings_by_addr.get(to_addr), } ) refs.sort(key=lambda entry: (entry["to"], entry["from"])) return refs def describe_caller(self, call_site: int, function: dict[str, object] | None) -> str: if function is None: return fmt_addr(call_site) return ( f"{fmt_addr(call_site)}@{fmt_addr(int(function['offset']))}:" f"{self.function_index.preferred_name(function)}" ) def describe_callee(self, call_site: int, function: dict[str, object] | None) -> str: if function is None: return fmt_addr(call_site) return ( f"{fmt_addr(call_site)}->{fmt_addr(int(function['offset']))}:" f"{self.function_index.preferred_name(function)}" ) def describe_data_ref(self, entry: dict[str, object]) -> str: target = fmt_addr(int(entry["to"])) string_entry = entry["string"] if string_entry is not None: target += f':"{display_string(str(string_entry["string"]))}"' return f"{fmt_addr(int(entry['from']))}->{target}" def collect_key_constants(self, pdfj: dict[str, object]) -> list[str]: values: list[int] = [] seen: set[int] = set() for op in pdfj.get("ops", []): for key in ("val", "ptr"): raw = op.get(key) if not isinstance(raw, int): continue if raw < 0x10 or raw > 0x100000: continue if 0x00400000 <= raw <= 0x01000000: continue if raw in seen: continue seen.add(raw) values.append(raw) return [fmt_addr(value) for value in values[:10]] def collect_key_strings(self, data_refs: list[dict[str, object]]) -> list[str]: strings: list[str] = [] seen: set[str] = set() for entry in data_refs: string_entry = entry["string"] if string_entry is None: continue text = display_string(str(string_entry["string"])) if text in seen: continue seen.add(text) strings.append(text) return strings[:8] def discover_adjacent_functions(self, addresses: list[int]) -> list[int]: discovered = set(addresses) for address in addresses: function = self.resolve_target_function(address) if function is None: continue for entry in self.format_callers(function): caller = entry["function"] if caller is None: continue caller_start = int(caller["offset"]) if PENDING_TEMPLATE_STORE_ADJACENT_MIN <= caller_start < PENDING_TEMPLATE_STORE_ADJACENT_MAX: discovered.add(caller_start) for entry in self.format_callees(function): callee = entry["function"] callee_start = int(callee["offset"]) if PENDING_TEMPLATE_STORE_ADJACENT_MIN <= callee_start < PENDING_TEMPLATE_STORE_ADJACENT_MAX: discovered.add(callee_start) return sorted(discovered) def build_function_rows(self, addresses: list[int]) -> list[dict[str, str]]: rows: list[dict[str, str]] = [] for query_address in self.discover_adjacent_functions(addresses): function = self.resolve_target_function(query_address) if function is None: continue callers = self.format_callers(function) callees = self.format_callees(function) data_refs = self.format_data_refs(function) pdfj = self.function_pdfj(int(function["offset"])) rows.append( { "query_address": fmt_addr(query_address), "function_address": fmt_addr(int(function["offset"])), "name": self.function_index.preferred_name(function), "size": str(function["size"]), "calling_convention": str(function.get("calltype", "unknown")), "caller_count": str(len(callers)), "callers": "; ".join( self.describe_caller(entry["call_site"], entry["function"]) for entry in callers ), "callee_count": str(len(callees)), "callees": "; ".join( self.describe_callee(entry["call_site"], entry["function"]) for entry in callees ), "data_ref_count": str(len(data_refs)), "data_refs": "; ".join(self.describe_data_ref(entry) for entry in data_refs), "key_constants": "; ".join(self.collect_key_constants(pdfj)), "key_strings": "; ".join(self.collect_key_strings(data_refs)), "entry_excerpt": self.excerpt(int(function["offset"])).replace("\n", " | "), } ) return rows def _case_groups(self, switch_address: int) -> list[dict[str, object]]: pdfj = self.function_pdfj(switch_address) groups: list[dict[str, object]] = [] current: dict[str, object] | None = None case_pattern = re.compile(r"^case\.0x[0-9a-f]+\.(\d+)$") default_pattern = re.compile(r"^case\.default\.0x[0-9a-f]+$") for op in pdfj.get("ops", []): labels: list[str] = [] for flag in op.get("flags", []): match = case_pattern.match(flag) if match: labels.append(match.group(1)) continue if default_pattern.match(flag): labels.append("default") if labels: current = { "start": int(op["offset"]), "cases": labels, "ops": [op], } groups.append(current) continue if current is not None: current["ops"].append(op) return groups def _infer_case_shape(self, ops: list[dict[str, object]]) -> tuple[str, str, str]: disasm_lines = [str(op["disasm"]) for op in ops] text = "\n".join(disasm_lines) direct_offsets = { int(match.group(1), 16) for match in re.finditer(r"\[(?:edi|eax|ecx)\+0x([0-9a-f]+)\]", text) } indexed_offsets = { int(match.group(1), 16) for match in re.finditer(r"\[(?:edi|eax|ecx)\+0x([0-9a-f]+)\]", text) if "*4" in text } free_calls = sum( 1 for op in ops if op.get("jump") == HEAP_FREE_ADDR or "fcn.0058f3c0" in str(op.get("disasm", "")) ) has_loop = any("*4" in line for line in disasm_lines) fields = ["top-level payload"] for offset in sorted(direct_offsets): fields.append(f"payload+0x{offset:02x}") if has_loop: for offset in sorted(indexed_offsets): fields.append(f"vector@payload+0x{offset:02x}") unique_fields = [] seen_fields: set[str] = set() for field in fields: if field in seen_fields: continue seen_fields.add(field) unique_fields.append(field) if has_loop and len(direct_offsets) >= 3: shape = "pointer vectors with paired side tables" elif has_loop: shape = "indexed pointer vector" elif len(direct_offsets) >= 4: shape = "fixed pointer tuple" elif len(direct_offsets) >= 2: shape = "paired nested pointers" elif free_calls <= 1: shape = "top-level payload pointer" else: shape = "single nested pointer" cleanup_summary = f"{free_calls} heap free call(s)" excerpt = " | ".join(disasm_lines[:8]) return shape, ", ".join(unique_fields), cleanup_summary + f"; {excerpt}" def build_record_kind_rows(self) -> list[dict[str, str]]: rows: list[dict[str, str]] = [] for group in self._case_groups(DESTRUCTOR_SWITCH_ADDR): shape, freed_fields, notes = self._infer_case_shape(group["ops"]) case_group = ",".join(group["cases"]) for case_label in group["cases"]: rows.append( { "record_kind": case_label, "case_group": case_group, "owning_function": fmt_addr(DESTRUCTOR_SWITCH_ADDR), "owning_name": self.curated_names.get( DESTRUCTOR_SWITCH_ADDR, "multiplayer_transport_destroy_pending_template_dispatch_record", ), "inferred_payload_shape": shape, "freed_fields": freed_fields, "notes": notes, } ) rows.sort( key=lambda row: ( row["record_kind"] == "default", int(row["record_kind"]) if row["record_kind"] != "default" else 0, ) ) return rows def write_csv(self, path: Path, rows: list[dict[str, str]]) -> None: if not rows: return with path.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=list(rows[0].keys())) writer.writeheader() writer.writerows(rows) def write_pending_template_store_markdown(self, function_rows: list[dict[str, str]]) -> None: by_address = {parse_hex(row["function_address"]): row for row in function_rows} sections = { "Init": [0x0059B710, 0x0059C5B0], "Destroy": [0x0059B2E0, 0x0059B740, 0x0059C5E0], "Lookup": [0x0059C540, 0x0059C590], "Prune / Remove": [0x0059C470], "Dispatch / Update": [0x0059C220, 0x0059C5F0], } lines = [ "# Pending-Template Store Management", "", f"- Target binary: `{self.exe_path}`", "- Scope: companion pending-template dispatch store and its adjacent management helpers.", "", ] for title, addresses in sections.items(): lines.extend([f"## {title}", ""]) for address in addresses: row = by_address.get(address) if row is None: continue lines.append(f"### `{row['function_address']}` `{row['name']}`") lines.append("") lines.append(f"- Size: `{row['size']}`") lines.append(f"- Calling convention: `{row['calling_convention']}`") lines.append(f"- Callers: {row['callers'] or 'none'}") lines.append(f"- Direct callees: {row['callees'] or 'none'}") lines.append(f"- Data refs: {row['data_refs'] or 'none'}") lines.append(f"- Key constants: {row['key_constants'] or 'none'}") lines.append(f"- Key strings: {row['key_strings'] or 'none'}") lines.append("") lines.append("Entry excerpt:") lines.append("") lines.append("```asm") lines.append(self.excerpt(address)) lines.append("```") lines.append("") (self.output_dir / "pending-template-store-management.md").write_text( "\n".join(lines) + "\n", encoding="utf-8", ) def export_pending_template_store( exe_path: Path, output_dir: Path, seed_addresses: list[int], ) -> None: analyzer = BranchAnalyzer(exe_path, output_dir) function_rows = analyzer.build_function_rows(seed_addresses) record_rows = analyzer.build_record_kind_rows() analyzer.write_csv(output_dir / "pending-template-store-functions.csv", function_rows) analyzer.write_csv(output_dir / "pending-template-store-record-kinds.csv", record_rows) analyzer.write_pending_template_store_markdown(function_rows)