Add save-side unclassified collection scanner

This commit is contained in:
Jan Petykiewicz 2026-04-18 11:16:28 -07:00
commit a4fd4f099d
2 changed files with 248 additions and 3 deletions

View file

@ -1,4 +1,4 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::Path;
use std::sync::OnceLock;
@ -1616,6 +1616,34 @@ pub struct SmpSaveTaggedCollectionHeaderProbe {
pub evidence: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSaveUnclassifiedTaggedCollectionHeaderProbe {
pub profile_family: String,
pub source_kind: String,
pub semantic_family: String,
pub metadata_tag: u32,
pub metadata_tag_hex: String,
pub records_tag: u32,
pub records_tag_hex: String,
pub close_tag: u32,
pub close_tag_hex: String,
pub metadata_tag_offset: usize,
pub records_tag_offset: usize,
pub close_tag_offset: usize,
pub records_span_len: usize,
pub direct_collection_flag: u32,
pub direct_collection_flag_hex: String,
pub direct_record_stride: u32,
pub direct_record_stride_hex: String,
pub live_id_bound: u32,
pub live_id_bound_hex: String,
pub live_record_count: u32,
pub live_record_count_hex: String,
pub header_words: Vec<u32>,
pub header_hex_words: Vec<String>,
pub evidence: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSaveTrainCollectionDirectoryEntryProbe {
pub live_entry_id: u32,
@ -2691,6 +2719,8 @@ pub struct SmpSaveCompanyChairmanAnalysisReport {
#[serde(default)]
pub placed_structure_record_triplets: Option<SmpSavePlacedStructureRecordTripletProbe>,
#[serde(default)]
pub unclassified_tagged_collection_headers: Vec<SmpSaveUnclassifiedTaggedCollectionHeaderProbe>,
#[serde(default)]
pub company_entries: Vec<SmpSaveCompanyRecordAnalysisEntry>,
#[serde(default)]
pub chairman_entries: Vec<SmpSaveChairmanRecordAnalysisEntry>,
@ -2957,6 +2987,9 @@ pub struct SmpInspectionReport {
pub save_placed_structure_record_triplet_probe:
Option<SmpSavePlacedStructureRecordTripletProbe>,
#[serde(default)]
pub save_unclassified_tagged_collection_header_probes:
Vec<SmpSaveUnclassifiedTaggedCollectionHeaderProbe>,
#[serde(default)]
pub save_company_roster_probe: Option<SmpLoadedCompanyRoster>,
#[serde(default)]
pub save_chairman_profile_table_probe: Option<SmpLoadedChairmanProfileTable>,
@ -3369,6 +3402,8 @@ pub fn inspect_save_company_and_chairman_analysis_bytes(
let region_record_triplets = report.save_region_record_triplet_probe.clone();
let placed_structure_record_triplets =
report.save_placed_structure_record_triplet_probe.clone();
let unclassified_tagged_collection_headers =
report.save_unclassified_tagged_collection_header_probes.clone();
let company_header_probe = report.save_company_collection_header_probe.as_ref();
let chairman_header_probe = report
.save_chairman_profile_collection_header_probe
@ -3726,6 +3761,18 @@ pub fn inspect_save_company_and_chairman_analysis_bytes(
triplets.entries.first().map(|entry| entry.profile_status_kind.as_str())
));
}
if let Some(candidate) = unclassified_tagged_collection_headers.first() {
notes.push(format!(
"Generic save-side tagged collection scan also found {} unclassified candidate families; largest current candidate uses tags {}/{}/{} with live_record_count={} stride=0x{:x} records_span_len=0x{:x}.",
unclassified_tagged_collection_headers.len(),
candidate.metadata_tag_hex,
candidate.records_tag_hex,
candidate.close_tag_hex,
candidate.live_record_count,
candidate.direct_record_stride,
candidate.records_span_len
));
}
if !company_entries.is_empty() {
notes.push(
"Company debt is derived from the grounded bond table at [company+0x5b/+0x5f] by summing live principal slots.".to_string(),
@ -3775,6 +3822,7 @@ pub fn inspect_save_company_and_chairman_analysis_bytes(
.save_placed_structure_collection_header_probe
.clone(),
placed_structure_record_triplets,
unclassified_tagged_collection_headers,
company_entries,
chairman_entries,
notes,
@ -7800,6 +7848,12 @@ fn inspect_bundle_bytes(bytes: &[u8], file_extension_hint: Option<String>) -> Sm
bytes,
save_placed_structure_collection_header_probe.as_ref(),
);
let save_unclassified_tagged_collection_header_probes =
scan_save_unclassified_tagged_collection_header_probes(
bytes,
file_extension_hint.as_deref(),
container_profile.as_ref(),
);
let save_company_roster_probe = parse_save_company_roster_probe(
bytes,
save_company_collection_header_probe.as_ref(),
@ -7969,6 +8023,7 @@ fn inspect_bundle_bytes(bytes: &[u8], file_extension_hint: Option<String>) -> Sm
save_region_record_triplet_probe,
save_placed_structure_collection_header_probe,
save_placed_structure_record_triplet_probe,
save_unclassified_tagged_collection_header_probes,
save_company_roster_probe,
save_chairman_profile_table_probe,
rt3_105_save_name_table_probe,
@ -10470,6 +10525,150 @@ fn parse_save_tagged_collection_header_probe(
})
}
fn scan_save_unclassified_tagged_collection_header_probes(
bytes: &[u8],
file_extension_hint: Option<&str>,
container_profile: Option<&SmpContainerProfile>,
) -> Vec<SmpSaveUnclassifiedTaggedCollectionHeaderProbe> {
if file_extension_hint != Some("gms") {
return Vec::new();
}
let Some(profile) = container_profile else {
return Vec::new();
};
if !matches!(
profile.profile_family.as_str(),
"rt3-classic-save-container-v1"
| "rt3-105-save-container-v1"
| "rt3-105-scenario-save-container-v1"
| "rt3-105-alt-save-container-v1"
) {
return Vec::new();
}
let known_metadata_tags = BTreeSet::from([
RT3_SAVE_WORLD_BLOCK_CHUNK_TAG,
0x000061a9,
0x00005209,
0x000036b1,
EVENT_RUNTIME_COLLECTION_METADATA_TAG as u32,
]);
let mut probes = Vec::new();
for metadata_tag_offset in 0..bytes.len().saturating_sub(INDEXED_COLLECTION_SERIALIZED_HEADER_LEN + 4)
{
let Some(metadata_tag) = read_u32_at(bytes, metadata_tag_offset) else {
continue;
};
if metadata_tag > 0xffff || known_metadata_tags.contains(&metadata_tag) {
continue;
}
let mut header_words = [0u32; INDEXED_COLLECTION_SERIALIZED_HEADER_DWORD_COUNT];
let mut valid_header = true;
for (index, word) in header_words.iter_mut().enumerate() {
let Some(value) = read_u32_at(bytes, metadata_tag_offset + 4 + index * 4) else {
valid_header = false;
break;
};
*word = value;
}
if !valid_header {
continue;
}
let summary = IndexedCollectionHeaderSummary {
metadata_tag_offset,
records_tag_offset: 0,
close_tag_offset: 0,
direct_collection_flag: header_words[0],
direct_record_stride: header_words[1],
live_id_bound: header_words[4],
live_record_count: header_words[5],
header_words,
};
if !matches!(summary.direct_collection_flag, 0 | 1)
|| summary.direct_record_stride == 0
|| summary.direct_record_stride > 0x4000
|| summary.live_id_bound == 0
|| summary.live_record_count == 0
|| summary.live_record_count > summary.live_id_bound
|| summary.live_id_bound > 0x100000
{
continue;
}
let records_tag = metadata_tag + 1;
let close_tag = metadata_tag + 2;
let records_search_start = metadata_tag_offset + 4;
let Some(records_relative_offset) =
find_u32_le_offsets(&bytes[records_search_start..], records_tag)
.into_iter()
.next()
else {
continue;
};
let records_tag_offset = records_search_start + records_relative_offset;
let close_search_start = records_tag_offset + 4;
let Some(close_relative_offset) =
find_u32_le_offsets(&bytes[close_search_start..], close_tag)
.into_iter()
.next()
else {
continue;
};
let close_tag_offset = close_search_start + close_relative_offset;
let records_span_len = close_tag_offset.saturating_sub(records_tag_offset + 4);
if records_span_len == 0 {
continue;
}
if probes.iter().any(|probe: &SmpSaveUnclassifiedTaggedCollectionHeaderProbe| {
probe.metadata_tag_offset == metadata_tag_offset
&& probe.records_tag_offset == records_tag_offset
&& probe.close_tag_offset == close_tag_offset
}) {
continue;
}
probes.push(SmpSaveUnclassifiedTaggedCollectionHeaderProbe {
profile_family: profile.profile_family.clone(),
source_kind: "save-unclassified-tagged-header-counts".to_string(),
semantic_family: "scenario-save-unclassified-tagged-header-counts".to_string(),
metadata_tag,
metadata_tag_hex: format!("0x{metadata_tag:08x}"),
records_tag,
records_tag_hex: format!("0x{records_tag:08x}"),
close_tag,
close_tag_hex: format!("0x{close_tag:08x}"),
metadata_tag_offset,
records_tag_offset,
close_tag_offset,
records_span_len,
direct_collection_flag: summary.direct_collection_flag,
direct_collection_flag_hex: format!("0x{:08x}", summary.direct_collection_flag),
direct_record_stride: summary.direct_record_stride,
direct_record_stride_hex: format!("0x{:08x}", summary.direct_record_stride),
live_id_bound: summary.live_id_bound,
live_id_bound_hex: format!("0x{:08x}", summary.live_id_bound),
live_record_count: summary.live_record_count,
live_record_count_hex: format!("0x{:08x}", summary.live_record_count),
header_words: summary.header_words.to_vec(),
header_hex_words: summary
.header_words
.iter()
.map(|word| format!("0x{word:08x}"))
.collect(),
evidence: vec![
"generic save-side tagged collection scan over plausible low u32 metadata tags not yet claimed by the checked-in collection probes".to_string(),
"candidate uses adjacent metadata/records/close tags with a header that matches the grounded indexed-collection shape (flag, stride, live_id_bound, live_record_count)".to_string(),
],
});
}
probes.sort_by(|left, right| {
right
.live_record_count
.cmp(&left.live_record_count)
.then_with(|| left.metadata_tag.cmp(&right.metadata_tag))
.then_with(|| left.metadata_tag_offset.cmp(&right.metadata_tag_offset))
});
probes.truncate(32);
probes
}
fn parse_save_len_prefixed_ascii_name(bytes: &[u8]) -> Option<String> {
let len = *bytes.first()? as usize;
let text_bytes = bytes.get(1..1 + len)?;
@ -18099,6 +18298,48 @@ mod tests {
assert_eq!(probe.live_record_count, 0x7ea);
}
#[test]
fn scans_unclassified_tagged_collection_header_probe_from_adjacent_low_tags() {
let mut bytes = vec![0u8; 0x400];
let metadata_tag_offset = 0x40usize;
let records_tag_offset = 0x140usize;
let close_tag_offset = 0x1c0usize;
bytes[metadata_tag_offset..metadata_tag_offset + 4]
.copy_from_slice(&0x00007001u32.to_le_bytes());
bytes[records_tag_offset..records_tag_offset + 4]
.copy_from_slice(&0x00007002u32.to_le_bytes());
bytes[close_tag_offset..close_tag_offset + 4]
.copy_from_slice(&0x00007003u32.to_le_bytes());
let header_words = [
0u32, 0x12, 0x0a, 0x14, 0x900, 0x808, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
for (index, word) in header_words.into_iter().enumerate() {
let offset = metadata_tag_offset + 4 + index * 4;
bytes[offset..offset + 4].copy_from_slice(&word.to_le_bytes());
}
let probes = scan_save_unclassified_tagged_collection_header_probes(
&bytes,
Some("gms"),
Some(&SmpContainerProfile {
profile_family: "rt3-105-save-container-v1".to_string(),
profile_evidence: vec![],
is_known_profile: true,
}),
);
let probe = probes
.iter()
.find(|probe| probe.metadata_tag == 0x7001)
.expect("should include synthetic unclassified tag family");
assert_eq!(probe.records_tag, 0x7002);
assert_eq!(probe.close_tag, 0x7003);
assert_eq!(probe.direct_record_stride, 0x12);
assert_eq!(probe.live_id_bound, 0x900);
assert_eq!(probe.live_record_count, 0x808);
assert_eq!(probe.records_span_len, close_tag_offset - (records_tag_offset + 4));
}
#[test]
fn parses_save_company_roster_probe_from_direct_records() {
let metadata_tag_offset = 0x40usize;

View file

@ -20,8 +20,9 @@ Working rule:
- Reconstruct the save-side placed-structure collection body on top of the newly grounded
`0x36b1/0x36b2/0x36b3` header seam so the blocked city-connection / linked-transit branch can
stop depending on atlas-only placed-structure and local-runtime refresh notes, especially the
semantics of the now-grounded compact `0x55f3` footer dword/status lane and any deeper side
buffers beyond the repeated `0x55f1/0x55f2/0x55f3` triplet envelope.
semantics of the now-grounded compact `0x55f3` footer dword/status lane and the newly exposed
unclassified tagged-collection candidates that may correspond to the separate placed-structure
dynamic side-buffer lane.
- Extend shellless clock advancement so more periodic-company service branches consume owned
runtime time state directly instead of only the explicit periodic service command.
- Keep widening selected-year world-owner state only when a full owning reader/rebuild family is
@ -76,6 +77,9 @@ Working rule:
padding beyond that embedded profile collection, so the remaining region blocker has shifted
from “find the hidden tail inside this payload” to “find the separate owner seam that backs the
runtime latches the city-connection branch still reads.”
- Save inspection now also exports a generic low-tag unclassified collection scan over plausible
indexed-collection headers, so the next city-connection pass can compare real save candidates
against the atlas-owned placed-structure dynamic side-buffer lane instead of blind tag hunting.
- The placed-structure tagged save stream now also exposes repeated `0x55f1/0x55f2/0x55f3`
triplets with dual name stems, a fixed five-`f32` policy row, and a compact `0x5dc1...0x5dc2`
footer carrying one raw `u32` payload lane plus one live `i32` status lane, so the remaining