Add save-side unclassified collection scanner

This commit is contained in:
Jan Petykiewicz 2026-04-18 11:16:28 -07:00
commit a4fd4f099d
2 changed files with 248 additions and 3 deletions

View file

@ -1,4 +1,4 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::Path;
use std::sync::OnceLock;
@ -1616,6 +1616,34 @@ pub struct SmpSaveTaggedCollectionHeaderProbe {
pub evidence: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSaveUnclassifiedTaggedCollectionHeaderProbe {
pub profile_family: String,
pub source_kind: String,
pub semantic_family: String,
pub metadata_tag: u32,
pub metadata_tag_hex: String,
pub records_tag: u32,
pub records_tag_hex: String,
pub close_tag: u32,
pub close_tag_hex: String,
pub metadata_tag_offset: usize,
pub records_tag_offset: usize,
pub close_tag_offset: usize,
pub records_span_len: usize,
pub direct_collection_flag: u32,
pub direct_collection_flag_hex: String,
pub direct_record_stride: u32,
pub direct_record_stride_hex: String,
pub live_id_bound: u32,
pub live_id_bound_hex: String,
pub live_record_count: u32,
pub live_record_count_hex: String,
pub header_words: Vec<u32>,
pub header_hex_words: Vec<String>,
pub evidence: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSaveTrainCollectionDirectoryEntryProbe {
pub live_entry_id: u32,
@ -2691,6 +2719,8 @@ pub struct SmpSaveCompanyChairmanAnalysisReport {
#[serde(default)]
pub placed_structure_record_triplets: Option<SmpSavePlacedStructureRecordTripletProbe>,
#[serde(default)]
pub unclassified_tagged_collection_headers: Vec<SmpSaveUnclassifiedTaggedCollectionHeaderProbe>,
#[serde(default)]
pub company_entries: Vec<SmpSaveCompanyRecordAnalysisEntry>,
#[serde(default)]
pub chairman_entries: Vec<SmpSaveChairmanRecordAnalysisEntry>,
@ -2957,6 +2987,9 @@ pub struct SmpInspectionReport {
pub save_placed_structure_record_triplet_probe:
Option<SmpSavePlacedStructureRecordTripletProbe>,
#[serde(default)]
pub save_unclassified_tagged_collection_header_probes:
Vec<SmpSaveUnclassifiedTaggedCollectionHeaderProbe>,
#[serde(default)]
pub save_company_roster_probe: Option<SmpLoadedCompanyRoster>,
#[serde(default)]
pub save_chairman_profile_table_probe: Option<SmpLoadedChairmanProfileTable>,
@ -3369,6 +3402,8 @@ pub fn inspect_save_company_and_chairman_analysis_bytes(
let region_record_triplets = report.save_region_record_triplet_probe.clone();
let placed_structure_record_triplets =
report.save_placed_structure_record_triplet_probe.clone();
let unclassified_tagged_collection_headers =
report.save_unclassified_tagged_collection_header_probes.clone();
let company_header_probe = report.save_company_collection_header_probe.as_ref();
let chairman_header_probe = report
.save_chairman_profile_collection_header_probe
@ -3726,6 +3761,18 @@ pub fn inspect_save_company_and_chairman_analysis_bytes(
triplets.entries.first().map(|entry| entry.profile_status_kind.as_str())
));
}
if let Some(candidate) = unclassified_tagged_collection_headers.first() {
notes.push(format!(
"Generic save-side tagged collection scan also found {} unclassified candidate families; largest current candidate uses tags {}/{}/{} with live_record_count={} stride=0x{:x} records_span_len=0x{:x}.",
unclassified_tagged_collection_headers.len(),
candidate.metadata_tag_hex,
candidate.records_tag_hex,
candidate.close_tag_hex,
candidate.live_record_count,
candidate.direct_record_stride,
candidate.records_span_len
));
}
if !company_entries.is_empty() {
notes.push(
"Company debt is derived from the grounded bond table at [company+0x5b/+0x5f] by summing live principal slots.".to_string(),
@ -3775,6 +3822,7 @@ pub fn inspect_save_company_and_chairman_analysis_bytes(
.save_placed_structure_collection_header_probe
.clone(),
placed_structure_record_triplets,
unclassified_tagged_collection_headers,
company_entries,
chairman_entries,
notes,
@ -7800,6 +7848,12 @@ fn inspect_bundle_bytes(bytes: &[u8], file_extension_hint: Option<String>) -> Sm
bytes,
save_placed_structure_collection_header_probe.as_ref(),
);
let save_unclassified_tagged_collection_header_probes =
scan_save_unclassified_tagged_collection_header_probes(
bytes,
file_extension_hint.as_deref(),
container_profile.as_ref(),
);
let save_company_roster_probe = parse_save_company_roster_probe(
bytes,
save_company_collection_header_probe.as_ref(),
@ -7969,6 +8023,7 @@ fn inspect_bundle_bytes(bytes: &[u8], file_extension_hint: Option<String>) -> Sm
save_region_record_triplet_probe,
save_placed_structure_collection_header_probe,
save_placed_structure_record_triplet_probe,
save_unclassified_tagged_collection_header_probes,
save_company_roster_probe,
save_chairman_profile_table_probe,
rt3_105_save_name_table_probe,
@ -10470,6 +10525,150 @@ fn parse_save_tagged_collection_header_probe(
})
}
fn scan_save_unclassified_tagged_collection_header_probes(
bytes: &[u8],
file_extension_hint: Option<&str>,
container_profile: Option<&SmpContainerProfile>,
) -> Vec<SmpSaveUnclassifiedTaggedCollectionHeaderProbe> {
if file_extension_hint != Some("gms") {
return Vec::new();
}
let Some(profile) = container_profile else {
return Vec::new();
};
if !matches!(
profile.profile_family.as_str(),
"rt3-classic-save-container-v1"
| "rt3-105-save-container-v1"
| "rt3-105-scenario-save-container-v1"
| "rt3-105-alt-save-container-v1"
) {
return Vec::new();
}
let known_metadata_tags = BTreeSet::from([
RT3_SAVE_WORLD_BLOCK_CHUNK_TAG,
0x000061a9,
0x00005209,
0x000036b1,
EVENT_RUNTIME_COLLECTION_METADATA_TAG as u32,
]);
let mut probes = Vec::new();
for metadata_tag_offset in 0..bytes.len().saturating_sub(INDEXED_COLLECTION_SERIALIZED_HEADER_LEN + 4)
{
let Some(metadata_tag) = read_u32_at(bytes, metadata_tag_offset) else {
continue;
};
if metadata_tag > 0xffff || known_metadata_tags.contains(&metadata_tag) {
continue;
}
let mut header_words = [0u32; INDEXED_COLLECTION_SERIALIZED_HEADER_DWORD_COUNT];
let mut valid_header = true;
for (index, word) in header_words.iter_mut().enumerate() {
let Some(value) = read_u32_at(bytes, metadata_tag_offset + 4 + index * 4) else {
valid_header = false;
break;
};
*word = value;
}
if !valid_header {
continue;
}
let summary = IndexedCollectionHeaderSummary {
metadata_tag_offset,
records_tag_offset: 0,
close_tag_offset: 0,
direct_collection_flag: header_words[0],
direct_record_stride: header_words[1],
live_id_bound: header_words[4],
live_record_count: header_words[5],
header_words,
};
if !matches!(summary.direct_collection_flag, 0 | 1)
|| summary.direct_record_stride == 0
|| summary.direct_record_stride > 0x4000
|| summary.live_id_bound == 0
|| summary.live_record_count == 0
|| summary.live_record_count > summary.live_id_bound
|| summary.live_id_bound > 0x100000
{
continue;
}
let records_tag = metadata_tag + 1;
let close_tag = metadata_tag + 2;
let records_search_start = metadata_tag_offset + 4;
let Some(records_relative_offset) =
find_u32_le_offsets(&bytes[records_search_start..], records_tag)
.into_iter()
.next()
else {
continue;
};
let records_tag_offset = records_search_start + records_relative_offset;
let close_search_start = records_tag_offset + 4;
let Some(close_relative_offset) =
find_u32_le_offsets(&bytes[close_search_start..], close_tag)
.into_iter()
.next()
else {
continue;
};
let close_tag_offset = close_search_start + close_relative_offset;
let records_span_len = close_tag_offset.saturating_sub(records_tag_offset + 4);
if records_span_len == 0 {
continue;
}
if probes.iter().any(|probe: &SmpSaveUnclassifiedTaggedCollectionHeaderProbe| {
probe.metadata_tag_offset == metadata_tag_offset
&& probe.records_tag_offset == records_tag_offset
&& probe.close_tag_offset == close_tag_offset
}) {
continue;
}
probes.push(SmpSaveUnclassifiedTaggedCollectionHeaderProbe {
profile_family: profile.profile_family.clone(),
source_kind: "save-unclassified-tagged-header-counts".to_string(),
semantic_family: "scenario-save-unclassified-tagged-header-counts".to_string(),
metadata_tag,
metadata_tag_hex: format!("0x{metadata_tag:08x}"),
records_tag,
records_tag_hex: format!("0x{records_tag:08x}"),
close_tag,
close_tag_hex: format!("0x{close_tag:08x}"),
metadata_tag_offset,
records_tag_offset,
close_tag_offset,
records_span_len,
direct_collection_flag: summary.direct_collection_flag,
direct_collection_flag_hex: format!("0x{:08x}", summary.direct_collection_flag),
direct_record_stride: summary.direct_record_stride,
direct_record_stride_hex: format!("0x{:08x}", summary.direct_record_stride),
live_id_bound: summary.live_id_bound,
live_id_bound_hex: format!("0x{:08x}", summary.live_id_bound),
live_record_count: summary.live_record_count,
live_record_count_hex: format!("0x{:08x}", summary.live_record_count),
header_words: summary.header_words.to_vec(),
header_hex_words: summary
.header_words
.iter()
.map(|word| format!("0x{word:08x}"))
.collect(),
evidence: vec![
"generic save-side tagged collection scan over plausible low u32 metadata tags not yet claimed by the checked-in collection probes".to_string(),
"candidate uses adjacent metadata/records/close tags with a header that matches the grounded indexed-collection shape (flag, stride, live_id_bound, live_record_count)".to_string(),
],
});
}
probes.sort_by(|left, right| {
right
.live_record_count
.cmp(&left.live_record_count)
.then_with(|| left.metadata_tag.cmp(&right.metadata_tag))
.then_with(|| left.metadata_tag_offset.cmp(&right.metadata_tag_offset))
});
probes.truncate(32);
probes
}
fn parse_save_len_prefixed_ascii_name(bytes: &[u8]) -> Option<String> {
let len = *bytes.first()? as usize;
let text_bytes = bytes.get(1..1 + len)?;
@ -18099,6 +18298,48 @@ mod tests {
assert_eq!(probe.live_record_count, 0x7ea);
}
#[test]
fn scans_unclassified_tagged_collection_header_probe_from_adjacent_low_tags() {
let mut bytes = vec![0u8; 0x400];
let metadata_tag_offset = 0x40usize;
let records_tag_offset = 0x140usize;
let close_tag_offset = 0x1c0usize;
bytes[metadata_tag_offset..metadata_tag_offset + 4]
.copy_from_slice(&0x00007001u32.to_le_bytes());
bytes[records_tag_offset..records_tag_offset + 4]
.copy_from_slice(&0x00007002u32.to_le_bytes());
bytes[close_tag_offset..close_tag_offset + 4]
.copy_from_slice(&0x00007003u32.to_le_bytes());
let header_words = [
0u32, 0x12, 0x0a, 0x14, 0x900, 0x808, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
for (index, word) in header_words.into_iter().enumerate() {
let offset = metadata_tag_offset + 4 + index * 4;
bytes[offset..offset + 4].copy_from_slice(&word.to_le_bytes());
}
let probes = scan_save_unclassified_tagged_collection_header_probes(
&bytes,
Some("gms"),
Some(&SmpContainerProfile {
profile_family: "rt3-105-save-container-v1".to_string(),
profile_evidence: vec![],
is_known_profile: true,
}),
);
let probe = probes
.iter()
.find(|probe| probe.metadata_tag == 0x7001)
.expect("should include synthetic unclassified tag family");
assert_eq!(probe.records_tag, 0x7002);
assert_eq!(probe.close_tag, 0x7003);
assert_eq!(probe.direct_record_stride, 0x12);
assert_eq!(probe.live_id_bound, 0x900);
assert_eq!(probe.live_record_count, 0x808);
assert_eq!(probe.records_span_len, close_tag_offset - (records_tag_offset + 4));
}
#[test]
fn parses_save_company_roster_probe_from_direct_records() {
let metadata_tag_offset = 0x40usize;