Build RE baseline and initial Rust workspace
This commit is contained in:
parent
8d1f280e2e
commit
ffaf155ef0
39 changed files with 5974 additions and 8 deletions
386
tools/ghidra/scripts/ExportStartupFunctions.java
Normal file
386
tools/ghidra/scripts/ExportStartupFunctions.java
Normal file
|
|
@ -0,0 +1,386 @@
|
|||
// Export startup-oriented function metadata from the current Ghidra program.
|
||||
//@category RT3
|
||||
|
||||
import ghidra.app.script.GhidraScript;
|
||||
import ghidra.program.model.address.Address;
|
||||
import ghidra.program.model.listing.Function;
|
||||
import ghidra.program.model.listing.Instruction;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class ExportStartupFunctions extends GhidraScript {
|
||||
|
||||
private static class QueueEntry {
|
||||
final Function function;
|
||||
final int depth;
|
||||
final String parentAddress;
|
||||
final String callSite;
|
||||
|
||||
QueueEntry(Function function, int depth, String parentAddress, String callSite) {
|
||||
this.function = function;
|
||||
this.depth = depth;
|
||||
this.parentAddress = parentAddress;
|
||||
this.callSite = callSite;
|
||||
}
|
||||
}
|
||||
|
||||
private static class RootSpec {
|
||||
final String name;
|
||||
final String address;
|
||||
|
||||
RootSpec(String name, String address) {
|
||||
this.name = name;
|
||||
this.address = address;
|
||||
}
|
||||
}
|
||||
|
||||
private static class FunctionRow {
|
||||
String rootName;
|
||||
String rootAddress;
|
||||
String address;
|
||||
int depth;
|
||||
String name;
|
||||
long sizeBytes;
|
||||
String callingConvention;
|
||||
String signatureSource;
|
||||
String signature;
|
||||
String parentAddress;
|
||||
String callSite;
|
||||
}
|
||||
|
||||
private static class EdgeRow {
|
||||
String rootName;
|
||||
String rootAddress;
|
||||
String parentAddress;
|
||||
String childAddress;
|
||||
String callSite;
|
||||
}
|
||||
|
||||
private static class CallTarget {
|
||||
final String callSite;
|
||||
final Function callee;
|
||||
|
||||
CallTarget(String callSite, Function callee) {
|
||||
this.callSite = callSite;
|
||||
this.callee = callee;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void run() throws Exception {
|
||||
String[] args = getScriptArgs();
|
||||
if (args.length < 1) {
|
||||
throw new RuntimeException(
|
||||
"usage: ExportStartupFunctions.java <output-dir> [max-depth] [root-name:address ...]");
|
||||
}
|
||||
|
||||
String outputDir = args[0];
|
||||
int maxDepth = args.length > 1 ? Integer.parseInt(args[1]) : 2;
|
||||
List<RootSpec> roots = parseRoots(args);
|
||||
|
||||
File output = new File(outputDir);
|
||||
output.mkdirs();
|
||||
|
||||
List<FunctionRow> rows = new ArrayList<FunctionRow>();
|
||||
List<EdgeRow> edges = new ArrayList<EdgeRow>();
|
||||
|
||||
for (RootSpec root : roots) {
|
||||
collectRoot(rows, edges, root, maxDepth);
|
||||
}
|
||||
|
||||
Collections.sort(rows, Comparator
|
||||
.comparing((FunctionRow row) -> row.rootName)
|
||||
.thenComparing(row -> row.rootAddress)
|
||||
.thenComparingInt(row -> row.depth)
|
||||
.thenComparing(row -> row.address));
|
||||
Collections.sort(edges, Comparator
|
||||
.comparing((EdgeRow row) -> row.rootName)
|
||||
.thenComparing(row -> row.rootAddress)
|
||||
.thenComparing(row -> row.parentAddress)
|
||||
.thenComparing(row -> row.callSite)
|
||||
.thenComparing(row -> row.childAddress));
|
||||
|
||||
writeCsv(new File(output, "ghidra-startup-functions.csv"), rows);
|
||||
writeMarkdown(new File(output, "startup-call-chain.md"), rows, edges, roots, maxDepth);
|
||||
}
|
||||
|
||||
private void collectRoot(
|
||||
List<FunctionRow> rows,
|
||||
List<EdgeRow> edges,
|
||||
RootSpec root,
|
||||
int maxDepth) {
|
||||
Function rootFunction = getInternalFunction(toAddr(root.address));
|
||||
if (rootFunction == null) {
|
||||
throw new IllegalArgumentException("no function found at " + root.address);
|
||||
}
|
||||
|
||||
ArrayDeque<QueueEntry> queue = new ArrayDeque<QueueEntry>();
|
||||
Set<String> seen = new HashSet<String>();
|
||||
queue.add(new QueueEntry(rootFunction, 0, "", ""));
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
QueueEntry item = queue.removeFirst();
|
||||
Function function = item.function;
|
||||
String address = asAddress(function.getEntryPoint());
|
||||
if (seen.contains(address)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(address);
|
||||
|
||||
FunctionRow row = new FunctionRow();
|
||||
row.rootName = root.name;
|
||||
row.rootAddress = root.address;
|
||||
row.address = address;
|
||||
row.depth = item.depth;
|
||||
row.name = function.getName();
|
||||
row.sizeBytes = function.getBody().getNumAddresses();
|
||||
row.callingConvention = safeString(function.getCallingConventionName(), "unknown");
|
||||
row.signatureSource = safeSignatureSource(function);
|
||||
row.signature = safeSignature(function);
|
||||
row.parentAddress = item.parentAddress;
|
||||
row.callSite = item.callSite;
|
||||
rows.add(row);
|
||||
|
||||
if (!item.parentAddress.isEmpty()) {
|
||||
EdgeRow edge = new EdgeRow();
|
||||
edge.rootName = root.name;
|
||||
edge.rootAddress = root.address;
|
||||
edge.parentAddress = item.parentAddress;
|
||||
edge.childAddress = address;
|
||||
edge.callSite = item.callSite;
|
||||
edges.add(edge);
|
||||
}
|
||||
|
||||
if (item.depth >= maxDepth) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (CallTarget callTarget : iterInternalCalls(function)) {
|
||||
queue.add(new QueueEntry(
|
||||
callTarget.callee,
|
||||
item.depth + 1,
|
||||
address,
|
||||
callTarget.callSite));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<RootSpec> parseRoots(String[] args) {
|
||||
List<RootSpec> roots = new ArrayList<RootSpec>();
|
||||
if (args.length <= 2) {
|
||||
roots.add(new RootSpec("entry", "0x005a313b"));
|
||||
return roots;
|
||||
}
|
||||
for (int index = 2; index < args.length; index++) {
|
||||
String token = args[index];
|
||||
int separator = token.indexOf(':');
|
||||
if (separator <= 0 || separator == token.length() - 1) {
|
||||
throw new IllegalArgumentException("root spec must be name:address: " + token);
|
||||
}
|
||||
roots.add(new RootSpec(token.substring(0, separator), token.substring(separator + 1)));
|
||||
}
|
||||
return roots;
|
||||
}
|
||||
|
||||
private Function getInternalFunction(Address address) {
|
||||
Function function = getFunctionAt(address);
|
||||
if (function == null) {
|
||||
function = getFunctionContaining(address);
|
||||
}
|
||||
if (function != null && function.isExternal()) {
|
||||
return null;
|
||||
}
|
||||
return function;
|
||||
}
|
||||
|
||||
private List<CallTarget> iterInternalCalls(Function function) {
|
||||
List<CallTarget> result = new ArrayList<CallTarget>();
|
||||
Set<String> seenCalls = new HashSet<String>();
|
||||
var instructions = currentProgram.getListing().getInstructions(function.getBody(), true);
|
||||
while (instructions.hasNext() && !monitor.isCancelled()) {
|
||||
Instruction instruction = instructions.next();
|
||||
if (!instruction.getFlowType().isCall()) {
|
||||
continue;
|
||||
}
|
||||
Address[] flows = instruction.getFlows();
|
||||
for (Address flow : flows) {
|
||||
Function callee = getInternalFunction(flow);
|
||||
if (callee == null) {
|
||||
continue;
|
||||
}
|
||||
String key = asAddress(instruction.getAddress()) + "->" + asAddress(callee.getEntryPoint());
|
||||
if (seenCalls.contains(key)) {
|
||||
continue;
|
||||
}
|
||||
seenCalls.add(key);
|
||||
result.add(new CallTarget(asAddress(instruction.getAddress()), callee));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private void writeCsv(File path, List<FunctionRow> rows) throws IOException {
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(path));
|
||||
try {
|
||||
writer.write(
|
||||
"root_name,root_address,address,depth,name,size_bytes,calling_convention,signature_source,signature,parent_address,call_site\n");
|
||||
for (FunctionRow row : rows) {
|
||||
writer.write(csv(row.rootName));
|
||||
writer.write(",");
|
||||
writer.write(csv(row.rootAddress));
|
||||
writer.write(",");
|
||||
writer.write(csv(row.address));
|
||||
writer.write(",");
|
||||
writer.write(csv(Integer.toString(row.depth)));
|
||||
writer.write(",");
|
||||
writer.write(csv(row.name));
|
||||
writer.write(",");
|
||||
writer.write(csv(Long.toString(row.sizeBytes)));
|
||||
writer.write(",");
|
||||
writer.write(csv(row.callingConvention));
|
||||
writer.write(",");
|
||||
writer.write(csv(row.signatureSource));
|
||||
writer.write(",");
|
||||
writer.write(csv(row.signature));
|
||||
writer.write(",");
|
||||
writer.write(csv(row.parentAddress));
|
||||
writer.write(",");
|
||||
writer.write(csv(row.callSite));
|
||||
writer.write("\n");
|
||||
}
|
||||
}
|
||||
finally {
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void writeMarkdown(
|
||||
File path,
|
||||
List<FunctionRow> rows,
|
||||
List<EdgeRow> edges,
|
||||
List<RootSpec> roots,
|
||||
int maxDepth) throws IOException {
|
||||
|
||||
Map<String, FunctionRow> byAddress = new HashMap<String, FunctionRow>();
|
||||
Map<String, List<EdgeRow>> children = new HashMap<String, List<EdgeRow>>();
|
||||
for (FunctionRow row : rows) {
|
||||
byAddress.put(keyedAddress(row.rootName, row.rootAddress, row.address), row);
|
||||
}
|
||||
for (EdgeRow edge : edges) {
|
||||
children.computeIfAbsent(
|
||||
keyedAddress(edge.rootName, edge.rootAddress, edge.parentAddress),
|
||||
ignored -> new ArrayList<EdgeRow>()).add(edge);
|
||||
}
|
||||
for (List<EdgeRow> childList : children.values()) {
|
||||
Collections.sort(childList, Comparator.comparing(edge -> edge.childAddress));
|
||||
}
|
||||
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(path));
|
||||
try {
|
||||
writer.write("# Startup Call Chain\n\n");
|
||||
writer.write("- Depth limit: `" + maxDepth + "`\n");
|
||||
writer.write("- Internal call targets only; imported APIs are intentionally excluded.\n\n");
|
||||
for (RootSpec root : roots) {
|
||||
FunctionRow row = byAddress.get(keyedAddress(root.name, root.address, root.address));
|
||||
if (row == null) {
|
||||
continue;
|
||||
}
|
||||
writer.write("## `" + root.name + "` root `" + row.address + "` `" + row.name + "`\n\n");
|
||||
writer.write("- `" + row.address + "` `" + row.name + "`\n");
|
||||
HashSet<String> visited = new HashSet<String>();
|
||||
visited.add(keyedAddress(root.name, root.address, row.address));
|
||||
emitChildren(writer, root.name, root.address, row.address, 1, children, byAddress, visited);
|
||||
writer.write("\n");
|
||||
}
|
||||
}
|
||||
finally {
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void emitChildren(
|
||||
BufferedWriter writer,
|
||||
String rootName,
|
||||
String rootAddress,
|
||||
String address,
|
||||
int indent,
|
||||
Map<String, List<EdgeRow>> children,
|
||||
Map<String, FunctionRow> byAddress,
|
||||
Set<String> visited) throws IOException {
|
||||
List<EdgeRow> childRows = children.get(keyedAddress(rootName, rootAddress, address));
|
||||
if (childRows == null) {
|
||||
return;
|
||||
}
|
||||
for (EdgeRow edge : childRows) {
|
||||
String childKey = keyedAddress(edge.rootName, edge.rootAddress, edge.childAddress);
|
||||
FunctionRow child = byAddress.get(childKey);
|
||||
if (child == null || visited.contains(childKey)) {
|
||||
continue;
|
||||
}
|
||||
visited.add(childKey);
|
||||
writer.write(" ".repeat(indent));
|
||||
writer.write("- `" + child.address + "` `" + child.name + "` via `" + edge.callSite + "`\n");
|
||||
emitChildren(
|
||||
writer,
|
||||
edge.rootName,
|
||||
edge.rootAddress,
|
||||
child.address,
|
||||
indent + 1,
|
||||
children,
|
||||
byAddress,
|
||||
visited);
|
||||
}
|
||||
}
|
||||
|
||||
private String keyedAddress(String rootName, String rootAddress, String address) {
|
||||
return rootName + "|" + rootAddress + "|" + address;
|
||||
}
|
||||
|
||||
private String safeSignature(Function function) {
|
||||
try {
|
||||
return function.getPrototypeString(true, false);
|
||||
}
|
||||
catch (Exception ignored) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
private String safeSignatureSource(Function function) {
|
||||
try {
|
||||
return function.getSignatureSource().toString();
|
||||
}
|
||||
catch (Exception ignored) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
private String asAddress(Address address) {
|
||||
return "0x" + address.toString();
|
||||
}
|
||||
|
||||
private String safeString(String value, String fallback) {
|
||||
if (value == null || value.isEmpty()) {
|
||||
return fallback;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
private String csv(String value) {
|
||||
if (value == null) {
|
||||
value = "";
|
||||
}
|
||||
return "\"" + value.replace("\"", "\"\"") + "\"";
|
||||
}
|
||||
}
|
||||
314
tools/py/collect_pe_artifacts.py
Normal file
314
tools/py/collect_pe_artifacts.py
Normal file
|
|
@ -0,0 +1,314 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SUMMARY_KEYS = {
|
||||
"Time/Date",
|
||||
"Magic",
|
||||
"AddressOfEntryPoint",
|
||||
"ImageBase",
|
||||
"Subsystem",
|
||||
"SizeOfImage",
|
||||
"SizeOfCode",
|
||||
"SizeOfInitializedData",
|
||||
"BaseOfCode",
|
||||
"BaseOfData",
|
||||
}
|
||||
|
||||
KEYWORDS = (
|
||||
"rail",
|
||||
"cargo",
|
||||
"map",
|
||||
"save",
|
||||
"scenario",
|
||||
"debug",
|
||||
"bink",
|
||||
"mss",
|
||||
"direct",
|
||||
"sound",
|
||||
"video",
|
||||
"d3d",
|
||||
)
|
||||
|
||||
SUBSYSTEM_HINTS = {
|
||||
"startup": {
|
||||
"dlls": {"KERNEL32.dll", "ADVAPI32.dll"},
|
||||
"strings": ("debug", "OutputDebugStringA"),
|
||||
},
|
||||
"ui": {
|
||||
"dlls": {"USER32.dll", "comdlg32.dll", "GDI32.dll"},
|
||||
"strings": ("MessageBoxA", "CreateWindowExA"),
|
||||
},
|
||||
"render": {
|
||||
"dlls": {"d3d8.dll"},
|
||||
"strings": ("Direct3D", "Hardware T & L", "Video.win"),
|
||||
},
|
||||
"audio": {
|
||||
"dlls": {"mss32.dll", "DSOUND.dll"},
|
||||
"strings": ("DirectSound", "PaintSound.win", "Data\\Sound"),
|
||||
},
|
||||
"input": {
|
||||
"dlls": {"DINPUT8.dll"},
|
||||
"strings": ("DirectInput8Create",),
|
||||
},
|
||||
"network": {
|
||||
"dlls": {"WS2_32.dll", "WSOCK32.dll"},
|
||||
"strings": ("Direct Play", "HOST version"),
|
||||
},
|
||||
"filesystem": {
|
||||
"dlls": {"KERNEL32.dll"},
|
||||
"strings": ("Saved Games", ".\\Maps\\", "MapViewOfFile"),
|
||||
},
|
||||
"resource": {
|
||||
"dlls": {"VERSION.dll", "ole32.dll"},
|
||||
"strings": ("CargoIcons", "CargoModels", ".imb"),
|
||||
},
|
||||
"map": {
|
||||
"dlls": set(),
|
||||
"strings": ("gptGameMap", "maps\\*.gmp", "Map Description"),
|
||||
},
|
||||
"scenario": {
|
||||
"dlls": set(),
|
||||
"strings": ("Scenario Text File", "Campaign Scenario"),
|
||||
},
|
||||
"save": {
|
||||
"dlls": set(),
|
||||
"strings": ("Quicksave", "Save Game:", "Saved Games"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def run_command(*args: str) -> str:
|
||||
return subprocess.run(
|
||||
args,
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
).stdout
|
||||
|
||||
|
||||
def sha256(path: Path) -> str:
|
||||
digest = hashlib.sha256()
|
||||
with path.open("rb") as handle:
|
||||
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
||||
digest.update(chunk)
|
||||
return digest.hexdigest()
|
||||
|
||||
|
||||
def parse_summary(text: str) -> dict[str, str]:
|
||||
summary: dict[str, str] = {}
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
match = re.match(r"([A-Za-z/]+)\s+(.+)", line)
|
||||
if not match:
|
||||
continue
|
||||
key, value = match.groups()
|
||||
if key in SUMMARY_KEYS:
|
||||
summary[key] = value.strip()
|
||||
return summary
|
||||
|
||||
|
||||
def parse_sections(text: str) -> list[dict[str, str]]:
|
||||
sections: list[dict[str, str]] = []
|
||||
lines = text.splitlines()
|
||||
for index, line in enumerate(lines):
|
||||
match = re.match(
|
||||
r"\s*(\d+)\s+(\S+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)",
|
||||
line,
|
||||
)
|
||||
if not match:
|
||||
continue
|
||||
idx, name, size, vma, lma, file_off = match.groups()
|
||||
flags = lines[index + 1].strip() if index + 1 < len(lines) else ""
|
||||
sections.append(
|
||||
{
|
||||
"idx": idx,
|
||||
"name": name,
|
||||
"size_hex": f"0x{size.lower()}",
|
||||
"vma": f"0x{vma.lower()}",
|
||||
"lma": f"0x{lma.lower()}",
|
||||
"file_offset": f"0x{file_off.lower()}",
|
||||
"flags": flags,
|
||||
}
|
||||
)
|
||||
return sections
|
||||
|
||||
|
||||
def parse_imports(text: str) -> tuple[list[str], list[dict[str, str]]]:
|
||||
dlls: list[str] = []
|
||||
functions: list[dict[str, str]] = []
|
||||
current_dll = ""
|
||||
in_imports = False
|
||||
for raw_line in text.splitlines():
|
||||
line = raw_line.rstrip()
|
||||
dll_match = re.match(r"\s*DLL Name:\s+(.+)", line)
|
||||
if dll_match:
|
||||
current_dll = dll_match.group(1).strip()
|
||||
dlls.append(current_dll)
|
||||
in_imports = False
|
||||
continue
|
||||
if "Hint/Ord" in line and "Name" in line:
|
||||
in_imports = True
|
||||
continue
|
||||
if not in_imports or not current_dll:
|
||||
continue
|
||||
fn_match = re.match(r"\s*([0-9]+)\s+(.+)", line)
|
||||
if fn_match:
|
||||
hint, name = fn_match.groups()
|
||||
functions.append({"dll": current_dll, "hint": hint, "name": name.strip()})
|
||||
elif line.strip() == "":
|
||||
in_imports = False
|
||||
return dlls, functions
|
||||
|
||||
|
||||
def interesting_strings(text: str) -> list[str]:
|
||||
hits: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for line in text.splitlines():
|
||||
lowered = line.lower()
|
||||
if any(keyword in lowered for keyword in KEYWORDS):
|
||||
stripped = line.strip()
|
||||
if stripped and stripped not in seen:
|
||||
hits.append(stripped)
|
||||
seen.add(stripped)
|
||||
return hits
|
||||
|
||||
|
||||
def build_subsystem_inventory(dlls: list[str], strings_found: list[str]) -> str:
|
||||
present_dlls = set(dlls)
|
||||
lower_strings = [entry.lower() for entry in strings_found]
|
||||
lines = ["# Starter Subsystem Inventory", ""]
|
||||
for name, hints in SUBSYSTEM_HINTS.items():
|
||||
matched_dlls = sorted(present_dlls.intersection(hints["dlls"]))
|
||||
matched_strings = [
|
||||
entry
|
||||
for entry in strings_found
|
||||
if any(marker.lower() in entry.lower() for marker in hints["strings"])
|
||||
]
|
||||
if not matched_dlls and not matched_strings:
|
||||
continue
|
||||
evidence = []
|
||||
if matched_dlls:
|
||||
evidence.append("DLLs: " + ", ".join(matched_dlls))
|
||||
if matched_strings:
|
||||
evidence.append("strings: " + "; ".join(matched_strings[:4]))
|
||||
lines.append(f"## {name}")
|
||||
lines.append("")
|
||||
lines.append("- Evidence: " + " | ".join(evidence))
|
||||
lines.append("- Status: initial hypothesis only")
|
||||
lines.append("")
|
||||
unknown_count = sum(1 for entry in lower_strings if "debug" in entry or "map" in entry)
|
||||
lines.append("## unknown")
|
||||
lines.append("")
|
||||
lines.append(f"- Evidence: {unknown_count} broad strings still need manual triage")
|
||||
lines.append("- Status: expected until GUI analysis identifies real call sites")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def write_csv(path: Path, fieldnames: list[str], rows: list[dict[str, str]]) -> None:
|
||||
with path.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 3:
|
||||
print("usage: collect_pe_artifacts.py <exe-path> <output-dir>", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
exe_path = Path(sys.argv[1]).resolve()
|
||||
out_dir = Path(sys.argv[2]).resolve()
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
file_output = run_command("file", str(exe_path)).strip()
|
||||
section_output = run_command("objdump", "-h", str(exe_path))
|
||||
pe_output = run_command("llvm-objdump", "-p", str(exe_path))
|
||||
strings_output = run_command("strings", "-n", "4", str(exe_path))
|
||||
|
||||
summary = parse_summary(pe_output)
|
||||
sections = parse_sections(section_output)
|
||||
dlls, functions = parse_imports(pe_output)
|
||||
strings_found = interesting_strings(strings_output)
|
||||
|
||||
summary_payload = {
|
||||
"path": str(exe_path),
|
||||
"sha256": sha256(exe_path),
|
||||
"size_bytes": exe_path.stat().st_size,
|
||||
"file": file_output,
|
||||
"summary": summary,
|
||||
"imported_dll_count": len(dlls),
|
||||
"imported_function_count": len(functions),
|
||||
}
|
||||
|
||||
(out_dir / "binary-summary.json").write_text(
|
||||
json.dumps(summary_payload, indent=2, sort_keys=True) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
write_csv(
|
||||
out_dir / "sections.csv",
|
||||
["idx", "name", "size_hex", "vma", "lma", "file_offset", "flags"],
|
||||
sections,
|
||||
)
|
||||
(out_dir / "imported-dlls.txt").write_text("\n".join(dlls) + "\n", encoding="utf-8")
|
||||
write_csv(out_dir / "imported-functions.csv", ["dll", "hint", "name"], functions)
|
||||
(out_dir / "interesting-strings.txt").write_text(
|
||||
"\n".join(strings_found) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(out_dir / "subsystem-inventory.md").write_text(
|
||||
build_subsystem_inventory(dlls, strings_found),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
entry_rva = summary.get("AddressOfEntryPoint", "")
|
||||
image_base = summary.get("ImageBase", "")
|
||||
entry_va = ""
|
||||
if entry_rva and image_base:
|
||||
entry_va = hex(int(entry_rva, 16) + int(image_base, 16))
|
||||
write_csv(
|
||||
out_dir / "function-map.csv",
|
||||
[
|
||||
"address",
|
||||
"size",
|
||||
"name",
|
||||
"subsystem",
|
||||
"calling_convention",
|
||||
"prototype_status",
|
||||
"source_tool",
|
||||
"confidence",
|
||||
"notes",
|
||||
"verified_against",
|
||||
],
|
||||
[
|
||||
{
|
||||
"address": entry_va,
|
||||
"size": "",
|
||||
"name": "entrypoint_1_06",
|
||||
"subsystem": "startup",
|
||||
"calling_convention": "unknown",
|
||||
"prototype_status": "unknown",
|
||||
"source_tool": "llvm-objdump",
|
||||
"confidence": "2",
|
||||
"notes": "Seed row from PE header entrypoint; function boundary still needs GUI confirmation.",
|
||||
"verified_against": f"sha256:{summary_payload['sha256']}",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
522
tools/py/export_analysis_context.py
Normal file
522
tools/py/export_analysis_context.py
Normal file
|
|
@ -0,0 +1,522 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from bisect import bisect_right
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Export reusable address and string context for focused RT3 analysis passes."
|
||||
)
|
||||
parser.add_argument("exe_path", type=Path)
|
||||
parser.add_argument("output_dir", type=Path)
|
||||
parser.add_argument(
|
||||
"--addr",
|
||||
action="append",
|
||||
default=[],
|
||||
help="Function or code address in hex. May be repeated.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--string",
|
||||
action="append",
|
||||
default=[],
|
||||
help="String text to resolve. May be repeated.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
if not args.addr and not args.string:
|
||||
parser.error("at least one --addr or --string target is required")
|
||||
return args
|
||||
|
||||
|
||||
def parse_hex(text: str) -> int:
|
||||
value = text.strip().lower()
|
||||
if value.startswith("0x"):
|
||||
value = value[2:]
|
||||
return int(value, 16)
|
||||
|
||||
|
||||
def fmt_addr(value: int) -> str:
|
||||
return f"0x{value:08x}"
|
||||
|
||||
|
||||
def display_string(text: str) -> str:
|
||||
return text.encode("unicode_escape").decode("ascii")
|
||||
|
||||
|
||||
def clean_json_payload(text: str) -> str:
|
||||
stripped = text.strip()
|
||||
if not stripped:
|
||||
raise ValueError("rizin returned empty output")
|
||||
starts = [index for index in (stripped.find("["), stripped.find("{")) if index >= 0]
|
||||
if not starts:
|
||||
raise ValueError("rizin did not return JSON")
|
||||
return stripped[min(starts) :]
|
||||
|
||||
|
||||
def run_rizin_json(exe_path: Path, command: str) -> object:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"rizin",
|
||||
"-q",
|
||||
"-e",
|
||||
"scr.color=false",
|
||||
"-c",
|
||||
command,
|
||||
str(exe_path),
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return json.loads(clean_json_payload(result.stdout))
|
||||
|
||||
|
||||
def run_objdump_excerpt(exe_path: Path, address: int, radius: int = 0x20) -> str:
|
||||
start = max(address - radius, 0)
|
||||
stop = address + radius
|
||||
result = subprocess.run(
|
||||
[
|
||||
"llvm-objdump",
|
||||
"-d",
|
||||
"--no-show-raw-insn",
|
||||
f"--start-address={fmt_addr(start)}",
|
||||
f"--stop-address={fmt_addr(stop)}",
|
||||
str(exe_path),
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
lines = [
|
||||
line.rstrip()
|
||||
for line in result.stdout.splitlines()
|
||||
if re.match(r"^\s*[0-9a-fA-F]+:", line)
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def load_curated_rows(path: Path) -> dict[int, dict[str, str]]:
|
||||
if not path.exists():
|
||||
return {}
|
||||
with path.open(newline="", encoding="utf-8") as handle:
|
||||
rows = csv.DictReader(handle)
|
||||
return {parse_hex(row["address"]): dict(row) for row in rows}
|
||||
|
||||
|
||||
class FunctionIndex:
|
||||
def __init__(self, rows: list[dict[str, object]], curated_names: dict[int, str]):
|
||||
self.rows = sorted(rows, key=lambda row: int(row["offset"]))
|
||||
self.by_start = {int(row["offset"]): row for row in self.rows}
|
||||
self.starts = [int(row["offset"]) for row in self.rows]
|
||||
self.curated_names = curated_names
|
||||
|
||||
def get_exact(self, address: int) -> dict[str, object] | None:
|
||||
return self.by_start.get(address)
|
||||
|
||||
def find_containing(self, address: int) -> dict[str, object] | None:
|
||||
index = bisect_right(self.starts, address) - 1
|
||||
if index < 0:
|
||||
return None
|
||||
row = self.rows[index]
|
||||
start = int(row["offset"])
|
||||
end = int(row.get("maxbound", start + int(row.get("size", 0))))
|
||||
if start <= address < end:
|
||||
return row
|
||||
return None
|
||||
|
||||
def preferred_name(self, row: dict[str, object]) -> str:
|
||||
start = int(row["offset"])
|
||||
return self.curated_names.get(start, str(row["name"]))
|
||||
|
||||
|
||||
class ContextExporter:
|
||||
def __init__(self, exe_path: Path, output_dir: Path):
|
||||
self.exe_path = exe_path.resolve()
|
||||
self.output_dir = output_dir.resolve()
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
curated_map = self.output_dir / "function-map.csv"
|
||||
self.curated_rows = load_curated_rows(curated_map)
|
||||
self.curated_names = {
|
||||
address: row["name"] for address, row in self.curated_rows.items()
|
||||
}
|
||||
self.function_index = FunctionIndex(
|
||||
self.load_function_rows(),
|
||||
self.curated_names,
|
||||
)
|
||||
self.strings = list(run_rizin_json(self.exe_path, "izzj"))
|
||||
self.strings_by_addr = {int(entry["vaddr"]): entry for entry in self.strings}
|
||||
|
||||
def load_function_rows(self) -> list[dict[str, object]]:
|
||||
rows = list(run_rizin_json(self.exe_path, "aaa; aflj"))
|
||||
known_starts = {int(row["offset"]) for row in rows}
|
||||
missing_curated = sorted(address for address in self.curated_names if address not in known_starts)
|
||||
if not missing_curated:
|
||||
return rows
|
||||
|
||||
define_cmd = "aaa; " + "; ".join(
|
||||
f"af @ {fmt_addr(address)}" for address in missing_curated
|
||||
) + "; aflj"
|
||||
return list(run_rizin_json(self.exe_path, define_cmd))
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def xrefs_to(self, address: int) -> list[dict[str, object]]:
|
||||
return list(run_rizin_json(self.exe_path, f"aaa; axtj @ {fmt_addr(address)}"))
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def excerpt(self, address: int) -> str:
|
||||
return run_objdump_excerpt(self.exe_path, address)
|
||||
|
||||
def fallback_function(self, address: int) -> dict[str, object] | None:
|
||||
curated = self.curated_rows.get(address)
|
||||
if curated is None:
|
||||
return None
|
||||
|
||||
size = int(curated["size"])
|
||||
return {
|
||||
"offset": address,
|
||||
"name": curated["name"],
|
||||
"size": size,
|
||||
"maxbound": address + size,
|
||||
"calltype": curated["calling_convention"],
|
||||
"signature": "",
|
||||
"codexrefs": self.xrefs_to(address),
|
||||
"callrefs": [],
|
||||
"datarefs": [],
|
||||
"synthetic": True,
|
||||
}
|
||||
|
||||
def resolve_target_function(self, address: int) -> dict[str, object] | None:
|
||||
exact = self.function_index.get_exact(address)
|
||||
if exact is not None:
|
||||
return exact
|
||||
|
||||
fallback = self.fallback_function(address)
|
||||
if fallback is not None:
|
||||
return fallback
|
||||
|
||||
return self.function_index.find_containing(address)
|
||||
|
||||
def resolve_string_matches(self, query: str) -> list[tuple[str, dict[str, object]]]:
|
||||
exact = [entry for entry in self.strings if str(entry.get("string", "")) == query]
|
||||
if exact:
|
||||
return [("exact", entry) for entry in exact]
|
||||
partial = [entry for entry in self.strings if query in str(entry.get("string", ""))]
|
||||
return [("substring", entry) for entry in partial]
|
||||
|
||||
def format_callers(self, row: dict[str, object]) -> list[dict[str, object]]:
|
||||
callers: list[dict[str, object]] = []
|
||||
for ref in row.get("codexrefs", []):
|
||||
if ref.get("type") != "CALL":
|
||||
continue
|
||||
call_site = int(ref["from"])
|
||||
caller = self.function_index.find_containing(call_site)
|
||||
callers.append(
|
||||
{
|
||||
"call_site": call_site,
|
||||
"function": caller,
|
||||
}
|
||||
)
|
||||
callers.sort(key=lambda entry: entry["call_site"])
|
||||
return callers
|
||||
|
||||
def format_callees(self, row: dict[str, object]) -> list[dict[str, object]]:
|
||||
callees: list[dict[str, object]] = []
|
||||
seen: set[tuple[int, int]] = set()
|
||||
for ref in row.get("callrefs", []):
|
||||
if ref.get("type") != "CALL":
|
||||
continue
|
||||
call_site = int(ref["from"])
|
||||
callee_site = int(ref["to"])
|
||||
callee = self.function_index.find_containing(callee_site)
|
||||
if callee is None:
|
||||
continue
|
||||
key = (call_site, int(callee["offset"]))
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
callees.append(
|
||||
{
|
||||
"call_site": call_site,
|
||||
"function": callee,
|
||||
}
|
||||
)
|
||||
callees.sort(key=lambda entry: (int(entry["function"]["offset"]), entry["call_site"]))
|
||||
return callees
|
||||
|
||||
def format_data_refs(self, row: dict[str, object]) -> list[dict[str, object]]:
|
||||
refs: list[dict[str, object]] = []
|
||||
seen: set[tuple[int, int, str]] = set()
|
||||
for ref in row.get("datarefs", []):
|
||||
from_addr = int(ref["from"])
|
||||
to_addr = int(ref["to"])
|
||||
ref_type = str(ref.get("type", "DATA"))
|
||||
key = (from_addr, to_addr, ref_type)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
refs.append(
|
||||
{
|
||||
"from": from_addr,
|
||||
"to": to_addr,
|
||||
"type": ref_type,
|
||||
"string": self.strings_by_addr.get(to_addr),
|
||||
}
|
||||
)
|
||||
refs.sort(key=lambda entry: (entry["to"], entry["from"]))
|
||||
return refs
|
||||
|
||||
def build_function_rows(self, targets: list[int]) -> list[dict[str, str]]:
|
||||
rows: list[dict[str, str]] = []
|
||||
for query_address in sorted(dict.fromkeys(targets)):
|
||||
function = self.resolve_target_function(query_address)
|
||||
if function is None:
|
||||
raise ValueError(f"no function found for {fmt_addr(query_address)}")
|
||||
|
||||
callers = self.format_callers(function)
|
||||
callees = self.format_callees(function)
|
||||
data_refs = self.format_data_refs(function)
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"query_address": fmt_addr(query_address),
|
||||
"function_address": fmt_addr(int(function["offset"])),
|
||||
"name": self.function_index.preferred_name(function),
|
||||
"size": str(function["size"]),
|
||||
"calling_convention": str(function.get("calltype", "unknown")),
|
||||
"signature": str(function.get("signature", "")),
|
||||
"caller_count": str(len(callers)),
|
||||
"callers": "; ".join(
|
||||
self.describe_caller(entry["call_site"], entry["function"])
|
||||
for entry in callers
|
||||
),
|
||||
"callee_count": str(len(callees)),
|
||||
"callees": "; ".join(
|
||||
self.describe_callee(entry["call_site"], entry["function"])
|
||||
for entry in callees
|
||||
),
|
||||
"data_ref_count": str(len(data_refs)),
|
||||
"data_refs": "; ".join(self.describe_data_ref(entry) for entry in data_refs),
|
||||
"entry_excerpt": self.excerpt(int(function["offset"])).replace("\n", " | "),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
def build_string_rows(self, targets: list[str]) -> list[dict[str, str]]:
|
||||
rows: list[dict[str, str]] = []
|
||||
for query in targets:
|
||||
matches = self.resolve_string_matches(query)
|
||||
if not matches:
|
||||
raise ValueError(f"no string match found for {query!r}")
|
||||
|
||||
for match_kind, string_entry in matches:
|
||||
address = int(string_entry["vaddr"])
|
||||
xrefs = self.xrefs_to(address)
|
||||
rows.append(
|
||||
{
|
||||
"query_text": query,
|
||||
"match_kind": match_kind,
|
||||
"string_address": fmt_addr(address),
|
||||
"string_text": display_string(str(string_entry["string"])),
|
||||
"xref_count": str(len(xrefs)),
|
||||
"xrefs": "; ".join(self.describe_string_xref(entry) for entry in xrefs),
|
||||
}
|
||||
)
|
||||
rows.sort(key=lambda row: (row["query_text"], row["string_address"]))
|
||||
return rows
|
||||
|
||||
def describe_caller(self, call_site: int, function: dict[str, object] | None) -> str:
|
||||
if function is None:
|
||||
return fmt_addr(call_site)
|
||||
return (
|
||||
f"{fmt_addr(call_site)}@{fmt_addr(int(function['offset']))}:"
|
||||
f"{self.function_index.preferred_name(function)}"
|
||||
)
|
||||
|
||||
def describe_callee(self, call_site: int, function: dict[str, object] | None) -> str:
|
||||
if function is None:
|
||||
return fmt_addr(call_site)
|
||||
return (
|
||||
f"{fmt_addr(call_site)}->{fmt_addr(int(function['offset']))}:"
|
||||
f"{self.function_index.preferred_name(function)}"
|
||||
)
|
||||
|
||||
def describe_data_ref(self, entry: dict[str, object]) -> str:
|
||||
target = fmt_addr(int(entry["to"]))
|
||||
string_entry = entry["string"]
|
||||
if string_entry is not None:
|
||||
target += f':"{display_string(str(string_entry["string"]))}"'
|
||||
return f"{fmt_addr(int(entry['from']))}->{target}"
|
||||
|
||||
def describe_string_xref(self, entry: dict[str, object]) -> str:
|
||||
from_addr = int(entry["from"])
|
||||
ref_type = str(entry.get("type", "DATA"))
|
||||
function = self.function_index.find_containing(from_addr)
|
||||
if function is None:
|
||||
return f"{fmt_addr(from_addr)}:{ref_type}"
|
||||
return (
|
||||
f"{fmt_addr(from_addr)}@{fmt_addr(int(function['offset']))}:"
|
||||
f"{self.function_index.preferred_name(function)}:{ref_type}"
|
||||
)
|
||||
|
||||
def write_csv(self, path: Path, rows: list[dict[str, str]]) -> None:
|
||||
if not rows:
|
||||
return
|
||||
with path.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=list(rows[0].keys()))
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
def write_markdown(self, function_targets: list[int], string_targets: list[str]) -> None:
|
||||
lines = [
|
||||
"# Analysis Context",
|
||||
"",
|
||||
f"- Target binary: `{self.exe_path}`",
|
||||
"- Function names prefer the curated ledger when a committed mapping exists.",
|
||||
"",
|
||||
]
|
||||
|
||||
if function_targets:
|
||||
lines.extend(["## Function Targets", ""])
|
||||
for query_address in sorted(dict.fromkeys(function_targets)):
|
||||
function = self.resolve_target_function(query_address)
|
||||
if function is None:
|
||||
continue
|
||||
function_address = int(function["offset"])
|
||||
callers = self.format_callers(function)
|
||||
callees = self.format_callees(function)
|
||||
data_refs = self.format_data_refs(function)
|
||||
|
||||
lines.append(
|
||||
f"### `{fmt_addr(query_address)}` -> `{fmt_addr(function_address)}` `{self.function_index.preferred_name(function)}`"
|
||||
)
|
||||
lines.append("")
|
||||
lines.append(f"- Size: `{function['size']}`")
|
||||
lines.append(f"- Calling convention: `{function.get('calltype', 'unknown')}`")
|
||||
lines.append(f"- Signature: `{function.get('signature', '')}`")
|
||||
lines.append("")
|
||||
lines.append("Entry excerpt:")
|
||||
lines.append("")
|
||||
lines.append("```asm")
|
||||
lines.append(self.excerpt(function_address))
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
lines.append("Callers:")
|
||||
for entry in callers:
|
||||
function_row = entry["function"]
|
||||
if function_row is None:
|
||||
lines.append(f"- `{fmt_addr(entry['call_site'])}`")
|
||||
else:
|
||||
lines.append(
|
||||
f"- `{fmt_addr(entry['call_site'])}` in `{fmt_addr(int(function_row['offset']))}` `{self.function_index.preferred_name(function_row)}`"
|
||||
)
|
||||
if not callers:
|
||||
lines.append("- none")
|
||||
lines.append("")
|
||||
if callers:
|
||||
lines.append("Caller xref excerpts:")
|
||||
lines.append("")
|
||||
for entry in callers:
|
||||
lines.append(f"#### `{fmt_addr(entry['call_site'])}`")
|
||||
lines.append("")
|
||||
lines.append("```asm")
|
||||
lines.append(self.excerpt(entry["call_site"]))
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
|
||||
lines.append("Direct internal callees:")
|
||||
for entry in callees:
|
||||
function_row = entry["function"]
|
||||
lines.append(
|
||||
f"- `{fmt_addr(entry['call_site'])}` -> `{fmt_addr(int(function_row['offset']))}` `{self.function_index.preferred_name(function_row)}`"
|
||||
)
|
||||
if not callees:
|
||||
lines.append("- none")
|
||||
lines.append("")
|
||||
|
||||
lines.append("Data refs:")
|
||||
for entry in data_refs:
|
||||
target = fmt_addr(int(entry["to"]))
|
||||
string_entry = entry["string"]
|
||||
if string_entry is not None:
|
||||
lines.append(
|
||||
f'- `{fmt_addr(int(entry["from"]))}` -> `{target}` "{display_string(str(string_entry["string"]))}"'
|
||||
)
|
||||
else:
|
||||
lines.append(f"- `{fmt_addr(int(entry['from']))}` -> `{target}`")
|
||||
if not data_refs:
|
||||
lines.append("- none")
|
||||
lines.append("")
|
||||
|
||||
if string_targets:
|
||||
lines.extend(["## String Targets", ""])
|
||||
for query in string_targets:
|
||||
matches = self.resolve_string_matches(query)
|
||||
for match_kind, string_entry in matches:
|
||||
address = int(string_entry["vaddr"])
|
||||
xrefs = self.xrefs_to(address)
|
||||
lines.append(
|
||||
f"### `{query}` -> `{fmt_addr(address)}`"
|
||||
)
|
||||
lines.append("")
|
||||
lines.append(f"- Match kind: `{match_kind}`")
|
||||
lines.append(f'- String text: "{display_string(str(string_entry["string"]))}"')
|
||||
lines.append("")
|
||||
lines.append("Xrefs:")
|
||||
for entry in xrefs:
|
||||
from_addr = int(entry["from"])
|
||||
function = self.function_index.find_containing(from_addr)
|
||||
ref_type = str(entry.get("type", "DATA"))
|
||||
if function is None:
|
||||
lines.append(f"- `{fmt_addr(from_addr)}` `{ref_type}`")
|
||||
else:
|
||||
lines.append(
|
||||
f"- `{fmt_addr(from_addr)}` in `{fmt_addr(int(function['offset']))}` `{self.function_index.preferred_name(function)}` `{ref_type}`"
|
||||
)
|
||||
if not xrefs:
|
||||
lines.append("- none")
|
||||
lines.append("")
|
||||
|
||||
if xrefs:
|
||||
lines.append("Xref excerpts:")
|
||||
lines.append("")
|
||||
for entry in xrefs:
|
||||
from_addr = int(entry["from"])
|
||||
lines.append(f"#### `{fmt_addr(from_addr)}`")
|
||||
lines.append("")
|
||||
lines.append("```asm")
|
||||
lines.append(self.excerpt(from_addr))
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
|
||||
(self.output_dir / "analysis-context.md").write_text(
|
||||
"\n".join(lines) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
exporter = ContextExporter(args.exe_path, args.output_dir)
|
||||
function_targets = [parse_hex(value) for value in args.addr]
|
||||
string_targets = list(args.string)
|
||||
|
||||
function_rows = exporter.build_function_rows(function_targets)
|
||||
string_rows = exporter.build_string_rows(string_targets)
|
||||
|
||||
exporter.write_csv(exporter.output_dir / "analysis-context-functions.csv", function_rows)
|
||||
exporter.write_csv(exporter.output_dir / "analysis-context-strings.csv", string_rows)
|
||||
exporter.write_markdown(function_targets, string_targets)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
129
tools/py/export_startup_map.py
Normal file
129
tools/py/export_startup_map.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
DEFAULT_DEPTH = "2"
|
||||
DEFAULT_ROOTS = [
|
||||
("entry", "0x005a313b"),
|
||||
("bootstrap", "0x00484440"),
|
||||
]
|
||||
|
||||
|
||||
def parse_args(argv: list[str]) -> tuple[Path, Path, str, list[tuple[str, str]]]:
|
||||
if len(argv) < 3:
|
||||
print(
|
||||
"usage: export_startup_map.py <exe-path> <output-dir> [--depth N] [--root NAME:ADDR ...]",
|
||||
file=sys.stderr,
|
||||
)
|
||||
raise SystemExit(2)
|
||||
|
||||
exe_path = Path(argv[1]).resolve()
|
||||
output_dir = Path(argv[2]).resolve()
|
||||
depth = DEFAULT_DEPTH
|
||||
roots = list(DEFAULT_ROOTS)
|
||||
|
||||
index = 3
|
||||
while index < len(argv):
|
||||
token = argv[index]
|
||||
if token == "--depth":
|
||||
if index + 1 >= len(argv):
|
||||
print("--depth requires a value", file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
depth = argv[index + 1]
|
||||
index += 2
|
||||
continue
|
||||
|
||||
if token == "--root":
|
||||
if index + 1 >= len(argv):
|
||||
print("--root requires NAME:ADDR", file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
root_spec = argv[index + 1]
|
||||
if ":" not in root_spec:
|
||||
print("--root value must be NAME:ADDR", file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
name, address = root_spec.split(":", 1)
|
||||
if not name or not address:
|
||||
print("--root value must be NAME:ADDR", file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
if roots == DEFAULT_ROOTS:
|
||||
roots = []
|
||||
roots.append((name, address))
|
||||
index += 2
|
||||
continue
|
||||
|
||||
print("unknown argument: %s" % token, file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
||||
return exe_path, output_dir, depth, roots
|
||||
|
||||
|
||||
def main() -> int:
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
exe_path, output_dir, depth, roots = parse_args(sys.argv)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
ghidra_home = Path.home() / "software" / "ghidra"
|
||||
analyze_headless = ghidra_home / "support" / "analyzeHeadless"
|
||||
if not analyze_headless.exists():
|
||||
print("analyzeHeadless not found at %s" % analyze_headless, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
project_dir = repo_root / "ghidra_projects"
|
||||
project_dir.mkdir(parents=True, exist_ok=True)
|
||||
project_name = "rt3-1.06"
|
||||
script_path = repo_root / "tools" / "ghidra" / "scripts"
|
||||
ghidra_runtime = repo_root / ".ghidra"
|
||||
ghidra_home_dir = ghidra_runtime / "home"
|
||||
ghidra_config_dir = ghidra_runtime / "config"
|
||||
ghidra_cache_dir = ghidra_runtime / "cache"
|
||||
ghidra_java_prefs = ghidra_runtime / "java-prefs"
|
||||
ghidra_home_dir.mkdir(parents=True, exist_ok=True)
|
||||
ghidra_config_dir.mkdir(parents=True, exist_ok=True)
|
||||
ghidra_cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
ghidra_java_prefs.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
java_bin = shutil.which("java")
|
||||
if java_bin is None:
|
||||
print("java not found on PATH", file=sys.stderr)
|
||||
return 1
|
||||
java_home = Path(java_bin).resolve().parents[1]
|
||||
|
||||
command = [
|
||||
str(analyze_headless),
|
||||
str(project_dir),
|
||||
project_name,
|
||||
"-import",
|
||||
str(exe_path),
|
||||
"-overwrite",
|
||||
"-scriptPath",
|
||||
str(script_path),
|
||||
"-postScript",
|
||||
"ExportStartupFunctions.java",
|
||||
str(output_dir),
|
||||
depth,
|
||||
]
|
||||
command.extend(["%s:%s" % (name, address) for name, address in roots])
|
||||
command.extend([
|
||||
"-analysisTimeoutPerFile",
|
||||
"600",
|
||||
])
|
||||
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = str(ghidra_home_dir)
|
||||
env["XDG_CONFIG_HOME"] = str(ghidra_config_dir)
|
||||
env["XDG_CACHE_HOME"] = str(ghidra_cache_dir)
|
||||
env["JAVA_HOME"] = str(java_home)
|
||||
env["_JAVA_OPTIONS"] = "-Djava.util.prefs.userRoot=%s" % ghidra_java_prefs
|
||||
|
||||
subprocess.run(command, check=True, env=env)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
57
tools/py/rt3_rekit.py
Normal file
57
tools/py/rt3_rekit.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from rt3_rekitlib import (
|
||||
PENDING_TEMPLATE_STORE_DEFAULT_SEEDS,
|
||||
export_pending_template_store,
|
||||
parse_hex,
|
||||
)
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="RT3 CLI RE kit for repeatable branch-deepening exports."
|
||||
)
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
pending = subparsers.add_parser(
|
||||
"pending-template-store",
|
||||
help="Export the pending-template dispatch-store dossier.",
|
||||
)
|
||||
pending.add_argument("exe_path", type=Path)
|
||||
pending.add_argument("output_dir", type=Path)
|
||||
pending.add_argument(
|
||||
"--seed-addr",
|
||||
action="append",
|
||||
default=[],
|
||||
help="Hex seed address. May be repeated.",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "pending-template-store":
|
||||
seed_addresses = (
|
||||
[parse_hex(value) for value in args.seed_addr]
|
||||
if args.seed_addr
|
||||
else list(PENDING_TEMPLATE_STORE_DEFAULT_SEEDS)
|
||||
)
|
||||
export_pending_template_store(
|
||||
args.exe_path.resolve(),
|
||||
args.output_dir.resolve(),
|
||||
seed_addresses,
|
||||
)
|
||||
return 0
|
||||
|
||||
parser.error(f"unknown command: {args.command}")
|
||||
return 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
543
tools/py/rt3_rekitlib.py
Normal file
543
tools/py/rt3_rekitlib.py
Normal file
|
|
@ -0,0 +1,543 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
from bisect import bisect_right
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
PENDING_TEMPLATE_STORE_DEFAULT_SEEDS = [
|
||||
0x0059B2E0,
|
||||
0x0059B710,
|
||||
0x0059B740,
|
||||
0x0059C470,
|
||||
0x0059C540,
|
||||
0x0059C590,
|
||||
0x0059C5B0,
|
||||
0x0059C5E0,
|
||||
0x0059C5F0,
|
||||
]
|
||||
|
||||
PENDING_TEMPLATE_STORE_ADJACENT_MIN = 0x0059B000
|
||||
PENDING_TEMPLATE_STORE_ADJACENT_MAX = 0x0059D000
|
||||
DESTRUCTOR_SWITCH_ADDR = 0x0059B2E0
|
||||
HEAP_FREE_ADDR = 0x0058F3C0
|
||||
|
||||
|
||||
def parse_hex(text: str) -> int:
|
||||
value = text.strip().lower()
|
||||
if value.startswith("0x"):
|
||||
value = value[2:]
|
||||
return int(value, 16)
|
||||
|
||||
|
||||
def fmt_addr(value: int) -> str:
|
||||
return f"0x{value:08x}"
|
||||
|
||||
|
||||
def display_string(text: str) -> str:
|
||||
return text.encode("unicode_escape").decode("ascii")
|
||||
|
||||
|
||||
def clean_json_payload(text: str) -> str:
|
||||
stripped = text.strip()
|
||||
if not stripped:
|
||||
raise ValueError("rizin returned empty output")
|
||||
starts = [index for index in (stripped.find("["), stripped.find("{")) if index >= 0]
|
||||
if not starts:
|
||||
raise ValueError("rizin did not return JSON")
|
||||
return stripped[min(starts) :]
|
||||
|
||||
|
||||
def run_rizin_json(exe_path: Path, command: str) -> object:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"rizin",
|
||||
"-q",
|
||||
"-e",
|
||||
"scr.color=false",
|
||||
"-c",
|
||||
command,
|
||||
str(exe_path),
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return json.loads(clean_json_payload(result.stdout))
|
||||
|
||||
|
||||
def run_objdump_excerpt(exe_path: Path, address: int, radius: int = 0x20) -> str:
|
||||
start = max(address - radius, 0)
|
||||
stop = address + radius
|
||||
result = subprocess.run(
|
||||
[
|
||||
"llvm-objdump",
|
||||
"-d",
|
||||
"--no-show-raw-insn",
|
||||
f"--start-address={fmt_addr(start)}",
|
||||
f"--stop-address={fmt_addr(stop)}",
|
||||
str(exe_path),
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
lines = [
|
||||
line.rstrip()
|
||||
for line in result.stdout.splitlines()
|
||||
if re.match(r"^\s*[0-9a-fA-F]+:", line)
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def load_curated_rows(path: Path) -> dict[int, dict[str, str]]:
|
||||
if not path.exists():
|
||||
return {}
|
||||
with path.open(newline="", encoding="utf-8") as handle:
|
||||
rows = csv.DictReader(handle)
|
||||
return {parse_hex(row["address"]): dict(row) for row in rows}
|
||||
|
||||
|
||||
class FunctionIndex:
|
||||
def __init__(self, rows: list[dict[str, object]], curated_names: dict[int, str]):
|
||||
self.rows = sorted(rows, key=lambda row: int(row["offset"]))
|
||||
self.by_start = {int(row["offset"]): row for row in self.rows}
|
||||
self.starts = [int(row["offset"]) for row in self.rows]
|
||||
self.curated_names = curated_names
|
||||
|
||||
def get_exact(self, address: int) -> dict[str, object] | None:
|
||||
return self.by_start.get(address)
|
||||
|
||||
def find_containing(self, address: int) -> dict[str, object] | None:
|
||||
index = bisect_right(self.starts, address) - 1
|
||||
if index < 0:
|
||||
return None
|
||||
row = self.rows[index]
|
||||
start = int(row["offset"])
|
||||
end = int(row.get("maxbound", start + int(row.get("size", 0))))
|
||||
if start <= address < end:
|
||||
return row
|
||||
return None
|
||||
|
||||
def preferred_name(self, row: dict[str, object]) -> str:
|
||||
start = int(row["offset"])
|
||||
return self.curated_names.get(start, str(row["name"]))
|
||||
|
||||
|
||||
class BranchAnalyzer:
|
||||
def __init__(self, exe_path: Path, output_dir: Path):
|
||||
self.exe_path = exe_path.resolve()
|
||||
self.output_dir = output_dir.resolve()
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
curated_map = self.output_dir / "function-map.csv"
|
||||
self.curated_rows = load_curated_rows(curated_map)
|
||||
self.curated_names = {
|
||||
address: row["name"] for address, row in self.curated_rows.items()
|
||||
}
|
||||
self.function_index = FunctionIndex(
|
||||
self._load_function_rows(),
|
||||
self.curated_names,
|
||||
)
|
||||
self.strings = list(run_rizin_json(self.exe_path, "izzj"))
|
||||
self.strings_by_addr = {int(entry["vaddr"]): entry for entry in self.strings}
|
||||
|
||||
def _load_function_rows(self) -> list[dict[str, object]]:
|
||||
rows = list(run_rizin_json(self.exe_path, "aaa; aflj"))
|
||||
known_starts = {int(row["offset"]) for row in rows}
|
||||
missing_curated = sorted(
|
||||
address for address in self.curated_names if address not in known_starts
|
||||
)
|
||||
if not missing_curated:
|
||||
return rows
|
||||
|
||||
define_cmd = "aaa; " + "; ".join(
|
||||
f"af @ {fmt_addr(address)}" for address in missing_curated
|
||||
) + "; aflj"
|
||||
return list(run_rizin_json(self.exe_path, define_cmd))
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def xrefs_to(self, address: int) -> list[dict[str, object]]:
|
||||
return list(run_rizin_json(self.exe_path, f"aaa; axtj @ {fmt_addr(address)}"))
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def function_pdfj(self, address: int) -> dict[str, object]:
|
||||
payload = run_rizin_json(self.exe_path, f"aaa; s {fmt_addr(address)}; pdfj")
|
||||
if not isinstance(payload, dict):
|
||||
raise TypeError(f"unexpected pdfj payload for {fmt_addr(address)}")
|
||||
return payload
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def excerpt(self, address: int) -> str:
|
||||
return run_objdump_excerpt(self.exe_path, address)
|
||||
|
||||
def fallback_function(self, address: int) -> dict[str, object] | None:
|
||||
curated = self.curated_rows.get(address)
|
||||
if curated is None:
|
||||
return None
|
||||
|
||||
size = int(curated["size"])
|
||||
return {
|
||||
"offset": address,
|
||||
"name": curated["name"],
|
||||
"size": size,
|
||||
"maxbound": address + size,
|
||||
"calltype": curated["calling_convention"],
|
||||
"signature": "",
|
||||
"codexrefs": self.xrefs_to(address),
|
||||
"callrefs": [],
|
||||
"datarefs": [],
|
||||
"synthetic": True,
|
||||
}
|
||||
|
||||
def resolve_target_function(self, address: int) -> dict[str, object] | None:
|
||||
exact = self.function_index.get_exact(address)
|
||||
if exact is not None:
|
||||
return exact
|
||||
|
||||
fallback = self.fallback_function(address)
|
||||
if fallback is not None:
|
||||
return fallback
|
||||
|
||||
return self.function_index.find_containing(address)
|
||||
|
||||
def format_callers(self, row: dict[str, object]) -> list[dict[str, object]]:
|
||||
callers: list[dict[str, object]] = []
|
||||
for ref in row.get("codexrefs", []):
|
||||
if ref.get("type") != "CALL":
|
||||
continue
|
||||
call_site = int(ref["from"])
|
||||
caller = self.function_index.find_containing(call_site)
|
||||
callers.append({"call_site": call_site, "function": caller})
|
||||
callers.sort(key=lambda entry: entry["call_site"])
|
||||
return callers
|
||||
|
||||
def format_callees(self, row: dict[str, object]) -> list[dict[str, object]]:
|
||||
callees: list[dict[str, object]] = []
|
||||
seen: set[tuple[int, int]] = set()
|
||||
for ref in row.get("callrefs", []):
|
||||
if ref.get("type") != "CALL":
|
||||
continue
|
||||
call_site = int(ref["from"])
|
||||
callee_site = int(ref["to"])
|
||||
callee = self.function_index.find_containing(callee_site)
|
||||
if callee is None:
|
||||
continue
|
||||
key = (call_site, int(callee["offset"]))
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
callees.append({"call_site": call_site, "function": callee})
|
||||
callees.sort(key=lambda entry: (int(entry["function"]["offset"]), entry["call_site"]))
|
||||
return callees
|
||||
|
||||
def format_data_refs(self, row: dict[str, object]) -> list[dict[str, object]]:
|
||||
refs: list[dict[str, object]] = []
|
||||
seen: set[tuple[int, int, str]] = set()
|
||||
for ref in row.get("datarefs", []):
|
||||
from_addr = int(ref["from"])
|
||||
to_addr = int(ref["to"])
|
||||
ref_type = str(ref.get("type", "DATA"))
|
||||
key = (from_addr, to_addr, ref_type)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
refs.append(
|
||||
{
|
||||
"from": from_addr,
|
||||
"to": to_addr,
|
||||
"type": ref_type,
|
||||
"string": self.strings_by_addr.get(to_addr),
|
||||
}
|
||||
)
|
||||
refs.sort(key=lambda entry: (entry["to"], entry["from"]))
|
||||
return refs
|
||||
|
||||
def describe_caller(self, call_site: int, function: dict[str, object] | None) -> str:
|
||||
if function is None:
|
||||
return fmt_addr(call_site)
|
||||
return (
|
||||
f"{fmt_addr(call_site)}@{fmt_addr(int(function['offset']))}:"
|
||||
f"{self.function_index.preferred_name(function)}"
|
||||
)
|
||||
|
||||
def describe_callee(self, call_site: int, function: dict[str, object] | None) -> str:
|
||||
if function is None:
|
||||
return fmt_addr(call_site)
|
||||
return (
|
||||
f"{fmt_addr(call_site)}->{fmt_addr(int(function['offset']))}:"
|
||||
f"{self.function_index.preferred_name(function)}"
|
||||
)
|
||||
|
||||
def describe_data_ref(self, entry: dict[str, object]) -> str:
|
||||
target = fmt_addr(int(entry["to"]))
|
||||
string_entry = entry["string"]
|
||||
if string_entry is not None:
|
||||
target += f':"{display_string(str(string_entry["string"]))}"'
|
||||
return f"{fmt_addr(int(entry['from']))}->{target}"
|
||||
|
||||
def collect_key_constants(self, pdfj: dict[str, object]) -> list[str]:
|
||||
values: list[int] = []
|
||||
seen: set[int] = set()
|
||||
for op in pdfj.get("ops", []):
|
||||
for key in ("val", "ptr"):
|
||||
raw = op.get(key)
|
||||
if not isinstance(raw, int):
|
||||
continue
|
||||
if raw < 0x10 or raw > 0x100000:
|
||||
continue
|
||||
if 0x00400000 <= raw <= 0x01000000:
|
||||
continue
|
||||
if raw in seen:
|
||||
continue
|
||||
seen.add(raw)
|
||||
values.append(raw)
|
||||
return [fmt_addr(value) for value in values[:10]]
|
||||
|
||||
def collect_key_strings(self, data_refs: list[dict[str, object]]) -> list[str]:
|
||||
strings: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for entry in data_refs:
|
||||
string_entry = entry["string"]
|
||||
if string_entry is None:
|
||||
continue
|
||||
text = display_string(str(string_entry["string"]))
|
||||
if text in seen:
|
||||
continue
|
||||
seen.add(text)
|
||||
strings.append(text)
|
||||
return strings[:8]
|
||||
|
||||
def discover_adjacent_functions(self, addresses: list[int]) -> list[int]:
|
||||
discovered = set(addresses)
|
||||
for address in addresses:
|
||||
function = self.resolve_target_function(address)
|
||||
if function is None:
|
||||
continue
|
||||
for entry in self.format_callers(function):
|
||||
caller = entry["function"]
|
||||
if caller is None:
|
||||
continue
|
||||
caller_start = int(caller["offset"])
|
||||
if PENDING_TEMPLATE_STORE_ADJACENT_MIN <= caller_start < PENDING_TEMPLATE_STORE_ADJACENT_MAX:
|
||||
discovered.add(caller_start)
|
||||
for entry in self.format_callees(function):
|
||||
callee = entry["function"]
|
||||
callee_start = int(callee["offset"])
|
||||
if PENDING_TEMPLATE_STORE_ADJACENT_MIN <= callee_start < PENDING_TEMPLATE_STORE_ADJACENT_MAX:
|
||||
discovered.add(callee_start)
|
||||
return sorted(discovered)
|
||||
|
||||
def build_function_rows(self, addresses: list[int]) -> list[dict[str, str]]:
|
||||
rows: list[dict[str, str]] = []
|
||||
for query_address in self.discover_adjacent_functions(addresses):
|
||||
function = self.resolve_target_function(query_address)
|
||||
if function is None:
|
||||
continue
|
||||
callers = self.format_callers(function)
|
||||
callees = self.format_callees(function)
|
||||
data_refs = self.format_data_refs(function)
|
||||
pdfj = self.function_pdfj(int(function["offset"]))
|
||||
rows.append(
|
||||
{
|
||||
"query_address": fmt_addr(query_address),
|
||||
"function_address": fmt_addr(int(function["offset"])),
|
||||
"name": self.function_index.preferred_name(function),
|
||||
"size": str(function["size"]),
|
||||
"calling_convention": str(function.get("calltype", "unknown")),
|
||||
"caller_count": str(len(callers)),
|
||||
"callers": "; ".join(
|
||||
self.describe_caller(entry["call_site"], entry["function"])
|
||||
for entry in callers
|
||||
),
|
||||
"callee_count": str(len(callees)),
|
||||
"callees": "; ".join(
|
||||
self.describe_callee(entry["call_site"], entry["function"])
|
||||
for entry in callees
|
||||
),
|
||||
"data_ref_count": str(len(data_refs)),
|
||||
"data_refs": "; ".join(self.describe_data_ref(entry) for entry in data_refs),
|
||||
"key_constants": "; ".join(self.collect_key_constants(pdfj)),
|
||||
"key_strings": "; ".join(self.collect_key_strings(data_refs)),
|
||||
"entry_excerpt": self.excerpt(int(function["offset"])).replace("\n", " | "),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
def _case_groups(self, switch_address: int) -> list[dict[str, object]]:
|
||||
pdfj = self.function_pdfj(switch_address)
|
||||
groups: list[dict[str, object]] = []
|
||||
current: dict[str, object] | None = None
|
||||
case_pattern = re.compile(r"^case\.0x[0-9a-f]+\.(\d+)$")
|
||||
default_pattern = re.compile(r"^case\.default\.0x[0-9a-f]+$")
|
||||
for op in pdfj.get("ops", []):
|
||||
labels: list[str] = []
|
||||
for flag in op.get("flags", []):
|
||||
match = case_pattern.match(flag)
|
||||
if match:
|
||||
labels.append(match.group(1))
|
||||
continue
|
||||
if default_pattern.match(flag):
|
||||
labels.append("default")
|
||||
if labels:
|
||||
current = {
|
||||
"start": int(op["offset"]),
|
||||
"cases": labels,
|
||||
"ops": [op],
|
||||
}
|
||||
groups.append(current)
|
||||
continue
|
||||
if current is not None:
|
||||
current["ops"].append(op)
|
||||
return groups
|
||||
|
||||
def _infer_case_shape(self, ops: list[dict[str, object]]) -> tuple[str, str, str]:
|
||||
disasm_lines = [str(op["disasm"]) for op in ops]
|
||||
text = "\n".join(disasm_lines)
|
||||
direct_offsets = {
|
||||
int(match.group(1), 16)
|
||||
for match in re.finditer(r"\[(?:edi|eax|ecx)\+0x([0-9a-f]+)\]", text)
|
||||
}
|
||||
indexed_offsets = {
|
||||
int(match.group(1), 16)
|
||||
for match in re.finditer(r"\[(?:edi|eax|ecx)\+0x([0-9a-f]+)\]", text)
|
||||
if "*4" in text
|
||||
}
|
||||
free_calls = sum(
|
||||
1
|
||||
for op in ops
|
||||
if op.get("jump") == HEAP_FREE_ADDR or "fcn.0058f3c0" in str(op.get("disasm", ""))
|
||||
)
|
||||
has_loop = any("*4" in line for line in disasm_lines)
|
||||
|
||||
fields = ["top-level payload"]
|
||||
for offset in sorted(direct_offsets):
|
||||
fields.append(f"payload+0x{offset:02x}")
|
||||
if has_loop:
|
||||
for offset in sorted(indexed_offsets):
|
||||
fields.append(f"vector@payload+0x{offset:02x}")
|
||||
|
||||
unique_fields = []
|
||||
seen_fields: set[str] = set()
|
||||
for field in fields:
|
||||
if field in seen_fields:
|
||||
continue
|
||||
seen_fields.add(field)
|
||||
unique_fields.append(field)
|
||||
|
||||
if has_loop and len(direct_offsets) >= 3:
|
||||
shape = "pointer vectors with paired side tables"
|
||||
elif has_loop:
|
||||
shape = "indexed pointer vector"
|
||||
elif len(direct_offsets) >= 4:
|
||||
shape = "fixed pointer tuple"
|
||||
elif len(direct_offsets) >= 2:
|
||||
shape = "paired nested pointers"
|
||||
elif free_calls <= 1:
|
||||
shape = "top-level payload pointer"
|
||||
else:
|
||||
shape = "single nested pointer"
|
||||
|
||||
cleanup_summary = f"{free_calls} heap free call(s)"
|
||||
excerpt = " | ".join(disasm_lines[:8])
|
||||
return shape, ", ".join(unique_fields), cleanup_summary + f"; {excerpt}"
|
||||
|
||||
def build_record_kind_rows(self) -> list[dict[str, str]]:
|
||||
rows: list[dict[str, str]] = []
|
||||
for group in self._case_groups(DESTRUCTOR_SWITCH_ADDR):
|
||||
shape, freed_fields, notes = self._infer_case_shape(group["ops"])
|
||||
case_group = ",".join(group["cases"])
|
||||
for case_label in group["cases"]:
|
||||
rows.append(
|
||||
{
|
||||
"record_kind": case_label,
|
||||
"case_group": case_group,
|
||||
"owning_function": fmt_addr(DESTRUCTOR_SWITCH_ADDR),
|
||||
"owning_name": self.curated_names.get(
|
||||
DESTRUCTOR_SWITCH_ADDR,
|
||||
"multiplayer_transport_destroy_pending_template_dispatch_record",
|
||||
),
|
||||
"inferred_payload_shape": shape,
|
||||
"freed_fields": freed_fields,
|
||||
"notes": notes,
|
||||
}
|
||||
)
|
||||
rows.sort(
|
||||
key=lambda row: (
|
||||
row["record_kind"] == "default",
|
||||
int(row["record_kind"]) if row["record_kind"] != "default" else 0,
|
||||
)
|
||||
)
|
||||
return rows
|
||||
|
||||
def write_csv(self, path: Path, rows: list[dict[str, str]]) -> None:
|
||||
if not rows:
|
||||
return
|
||||
with path.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=list(rows[0].keys()))
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
def write_pending_template_store_markdown(self, function_rows: list[dict[str, str]]) -> None:
|
||||
by_address = {parse_hex(row["function_address"]): row for row in function_rows}
|
||||
sections = {
|
||||
"Init": [0x0059B710, 0x0059C5B0],
|
||||
"Destroy": [0x0059B2E0, 0x0059B740, 0x0059C5E0],
|
||||
"Lookup": [0x0059C540, 0x0059C590],
|
||||
"Prune / Remove": [0x0059C470],
|
||||
"Dispatch / Update": [0x0059C220, 0x0059C5F0],
|
||||
}
|
||||
|
||||
lines = [
|
||||
"# Pending-Template Store Management",
|
||||
"",
|
||||
f"- Target binary: `{self.exe_path}`",
|
||||
"- Scope: companion pending-template dispatch store and its adjacent management helpers.",
|
||||
"",
|
||||
]
|
||||
|
||||
for title, addresses in sections.items():
|
||||
lines.extend([f"## {title}", ""])
|
||||
for address in addresses:
|
||||
row = by_address.get(address)
|
||||
if row is None:
|
||||
continue
|
||||
lines.append(f"### `{row['function_address']}` `{row['name']}`")
|
||||
lines.append("")
|
||||
lines.append(f"- Size: `{row['size']}`")
|
||||
lines.append(f"- Calling convention: `{row['calling_convention']}`")
|
||||
lines.append(f"- Callers: {row['callers'] or 'none'}")
|
||||
lines.append(f"- Direct callees: {row['callees'] or 'none'}")
|
||||
lines.append(f"- Data refs: {row['data_refs'] or 'none'}")
|
||||
lines.append(f"- Key constants: {row['key_constants'] or 'none'}")
|
||||
lines.append(f"- Key strings: {row['key_strings'] or 'none'}")
|
||||
lines.append("")
|
||||
lines.append("Entry excerpt:")
|
||||
lines.append("")
|
||||
lines.append("```asm")
|
||||
lines.append(self.excerpt(address))
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
|
||||
(self.output_dir / "pending-template-store-management.md").write_text(
|
||||
"\n".join(lines) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def export_pending_template_store(
|
||||
exe_path: Path,
|
||||
output_dir: Path,
|
||||
seed_addresses: list[int],
|
||||
) -> None:
|
||||
analyzer = BranchAnalyzer(exe_path, output_dir)
|
||||
function_rows = analyzer.build_function_rows(seed_addresses)
|
||||
record_rows = analyzer.build_record_kind_rows()
|
||||
analyzer.write_csv(output_dir / "pending-template-store-functions.csv", function_rows)
|
||||
analyzer.write_csv(output_dir / "pending-template-store-record-kinds.csv", record_rows)
|
||||
analyzer.write_pending_template_store_markdown(function_rows)
|
||||
29
tools/run_hook_smoke_test.sh
Executable file
29
tools/run_hook_smoke_test.sh
Executable file
|
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
game_dir="$repo_root/rt3_wineprefix/drive_c/rt3"
|
||||
log_path="$game_dir/rrt_hook_attach.log"
|
||||
proxy_path="$game_dir/dinput8.dll"
|
||||
|
||||
. "$HOME/.local/share/cargo/env"
|
||||
|
||||
cargo build -p rrt-hook --target i686-pc-windows-gnu
|
||||
|
||||
rm -f "$log_path"
|
||||
cp "$repo_root/target/i686-pc-windows-gnu/debug/dinput8.dll" "$proxy_path"
|
||||
|
||||
(
|
||||
cd "$game_dir"
|
||||
timeout 8s env \
|
||||
WINEPREFIX="$repo_root/rt3_wineprefix" \
|
||||
WINEDLLOVERRIDES="dinput8=n,b" \
|
||||
/opt/wine-stable/bin/wine RT3.exe
|
||||
) >/tmp/rrt-hook-wine.log 2>&1 || true
|
||||
|
||||
if [[ -f "$log_path" ]]; then
|
||||
cat "$log_path"
|
||||
else
|
||||
echo "attach-log-missing" >&2
|
||||
exit 1
|
||||
fi
|
||||
Loading…
Add table
Add a link
Reference in a new issue