Summarize stock alias roots and peer residues

This commit is contained in:
Jan Petykiewicz 2026-04-19 15:28:23 -07:00
commit 9f603ac28e
5 changed files with 261 additions and 51 deletions

View file

@ -93,6 +93,9 @@ pub struct BuildingTypeRecoveredTableSummary {
pub present_standalone_entries: Vec<String>,
pub bare_port_warehouse_files: Vec<String>,
pub nonzero_bty_header_dword_summaries: Vec<BuildingTypeBtyHeaderDwordSummary>,
pub nonzero_bty_header_name_0x40_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub nonzero_bty_header_name_0x5e_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub nonzero_bty_header_name_0x7c_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@ -103,6 +106,14 @@ pub struct BuildingTypeBtyHeaderDwordSummary {
pub sample_file_names: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BuildingTypeBtyHeaderNameSummary {
pub header_offset_hex: String,
pub header_value: String,
pub file_count: usize,
pub sample_file_names: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BuildingTypeSourceReport {
pub directory_path: String,
@ -329,7 +340,8 @@ fn probe_bty_header(bytes: &[u8]) -> BuildingTypeBtyHeaderProbe {
}
fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
bytes.get(offset..offset + 4)
bytes
.get(offset..offset + 4)
.and_then(|slice| <[u8; 4]>::try_from(slice).ok())
.map(u32::from_le_bytes)
.unwrap_or(0)
@ -339,7 +351,10 @@ fn read_c_string(bytes: &[u8], offset: usize, max_len: usize) -> String {
let Some(slice) = bytes.get(offset..offset.saturating_add(max_len)) else {
return String::new();
};
let end = slice.iter().position(|byte| *byte == 0).unwrap_or(slice.len());
let end = slice
.iter()
.position(|byte| *byte == 0)
.unwrap_or(slice.len());
String::from_utf8_lossy(&slice[..end]).into_owned()
}
@ -386,14 +401,8 @@ fn summarize_recovered_table_families(
entries: &[BuildingTypeSourceEntry],
files: &[BuildingTypeSourceFile],
) -> BuildingTypeRecoveredTableSummary {
const RECOVERED_STYLE_THEMES: [&str; 6] = [
"Victorian",
"Tudor",
"SoWest",
"Persian",
"Kyoto",
"ClpBrd",
];
const RECOVERED_STYLE_THEMES: [&str; 6] =
["Victorian", "Tudor", "SoWest", "Persian", "Kyoto", "ClpBrd"];
const RECOVERED_SOURCE_KINDS: [&str; 5] = [
"StationSml",
"StationMed",
@ -435,12 +444,7 @@ fn summarize_recovered_table_families(
let mut bare_port_warehouse_files = files
.iter()
.filter(|file| {
matches!(
file.canonical_stem.as_str(),
"port" | "warehouse"
)
})
.filter(|file| matches!(file.canonical_stem.as_str(), "port" | "warehouse"))
.map(|file| file.file_name.clone())
.collect::<Vec<_>>();
bare_port_warehouse_files.sort();
@ -473,6 +477,13 @@ fn summarize_recovered_table_families(
})
.collect();
let nonzero_bty_header_name_0x40_summaries =
summarize_nonzero_bty_header_name_lane(files, 0x40, |probe| &probe.name_0x40);
let nonzero_bty_header_name_0x5e_summaries =
summarize_nonzero_bty_header_name_lane(files, 0x5e, |probe| &probe.name_0x5e);
let nonzero_bty_header_name_0x7c_summaries =
summarize_nonzero_bty_header_name_lane(files, 0x7c, |probe| &probe.name_0x7c);
BuildingTypeRecoveredTableSummary {
recovered_style_themes: RECOVERED_STYLE_THEMES
.into_iter()
@ -486,9 +497,58 @@ fn summarize_recovered_table_families(
present_standalone_entries,
bare_port_warehouse_files,
nonzero_bty_header_dword_summaries,
nonzero_bty_header_name_0x40_summaries,
nonzero_bty_header_name_0x5e_summaries,
nonzero_bty_header_name_0x7c_summaries,
}
}
fn summarize_nonzero_bty_header_name_lane(
files: &[BuildingTypeSourceFile],
offset: u32,
selector: impl Fn(&BuildingTypeBtyHeaderProbe) -> &String,
) -> Vec<BuildingTypeBtyHeaderNameSummary> {
let mut groups = BTreeMap::<String, Vec<String>>::new();
for file in files {
let Some(probe) = &file.bty_header_probe else {
continue;
};
if probe.dword_0xbb == 0 {
continue;
}
let header_value = selector(probe).trim();
if header_value.is_empty() {
continue;
}
groups
.entry(header_value.to_string())
.or_default()
.push(file.file_name.clone());
}
let mut summaries = groups
.into_iter()
.map(|(header_value, mut file_names)| {
file_names.sort();
file_names.dedup();
BuildingTypeBtyHeaderNameSummary {
header_offset_hex: format!("0x{offset:02x}"),
header_value,
file_count: file_names.len(),
sample_file_names: file_names.into_iter().take(24).collect(),
}
})
.collect::<Vec<_>>();
summaries.sort_by(|left, right| {
right
.file_count
.cmp(&left.file_count)
.then_with(|| left.header_offset_hex.cmp(&right.header_offset_hex))
.then_with(|| left.header_value.cmp(&right.header_value))
});
summaries
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
struct BuildingBindingArtifact {
bindings: Vec<BuildingBindingRow>,
@ -617,12 +677,16 @@ mod tests {
];
let summary = summarize_recovered_table_families(&entries, &files);
assert!(summary
.present_style_station_entries
.contains(&"VictorianStationSml".to_string()));
assert!(summary
.present_style_station_entries
.contains(&"ClpbrdStationLrg".to_string()));
assert!(
summary
.present_style_station_entries
.contains(&"VictorianStationSml".to_string())
);
assert!(
summary
.present_style_station_entries
.contains(&"ClpbrdStationLrg".to_string())
);
assert_eq!(
summary.present_standalone_entries,
vec!["Maintenance".to_string(), "ServiceTower".to_string()]
@ -640,5 +704,32 @@ mod tests {
summary.nonzero_bty_header_dword_summaries[0].sample_file_names,
vec!["Port.bty".to_string()]
);
assert_eq!(
summary.nonzero_bty_header_name_0x40_summaries,
vec![BuildingTypeBtyHeaderNameSummary {
header_offset_hex: "0x40".to_string(),
header_value: "Port".to_string(),
file_count: 1,
sample_file_names: vec!["Port.bty".to_string()],
}]
);
assert_eq!(
summary.nonzero_bty_header_name_0x5e_summaries,
vec![BuildingTypeBtyHeaderNameSummary {
header_offset_hex: "0x5e".to_string(),
header_value: "TextileMill".to_string(),
file_count: 1,
sample_file_names: vec!["Port.bty".to_string()],
}]
);
assert_eq!(
summary.nonzero_bty_header_name_0x7c_summaries,
vec![BuildingTypeBtyHeaderNameSummary {
header_offset_hex: "0x7c".to_string(),
header_value: "Port".to_string(),
file_count: 1,
sample_file_names: vec!["Port.bty".to_string()],
}]
);
}
}

View file

@ -3780,6 +3780,14 @@ pub struct SmpSavePlacedStructureNonzeroCompanionBuildingFamilyOverlapSummaryEnt
pub secondary_matches_nonzero_stock_building_header_family: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureNonzeroCompanionBuildingFamilyResidueSummaryEntry {
pub companion_byte_hex: String,
pub primary_name: String,
pub secondary_name: String,
pub count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructurePolicyTrailingWordSummaryEntry {
pub policy_trailing_word_hex: String,
@ -3831,6 +3839,9 @@ pub struct SmpPeriodicCompanyServiceTraceReport {
pub peer_site_selector_candidate_saved_nonzero_companion_building_family_overlap_summaries:
Vec<SmpSavePlacedStructureNonzeroCompanionBuildingFamilyOverlapSummaryEntry>,
#[serde(default)]
pub peer_site_selector_candidate_saved_nonzero_companion_building_family_residue_summaries:
Vec<SmpSavePlacedStructureNonzeroCompanionBuildingFamilyResidueSummaryEntry>,
#[serde(default)]
pub peer_site_persisted_selector_bundle_fields: Vec<String>,
#[serde(default)]
pub peer_site_rebuilt_transient_followon_fields: Vec<String>,
@ -4809,35 +4820,7 @@ fn summarize_peer_site_selector_candidate_saved_nonzero_companion_name_pairs(
fn summarize_peer_site_selector_candidate_saved_nonzero_companion_building_family_overlaps(
summaries: &[SmpSavePlacedStructureNonzeroCompanionNamePairSummaryEntry],
) -> Vec<SmpSavePlacedStructureNonzeroCompanionBuildingFamilyOverlapSummaryEntry> {
let nonzero_stock_family_aliases = [
"Brewery",
"ConcretePlant",
"ConstructionFirm",
"DairyProcessor",
"Distillery",
"ElectronicsPlant",
"Furnace",
"FurnitureFactory",
"Hospital",
"Lumbermill",
"MachineShop",
"MeatPackingPlant",
"Museum",
"PaperMill",
"PharmaceuticalPlant",
"Port",
"Recycling Plant",
"Steel Mill",
"Textile Mill",
"TextileMill",
"Tire Factory",
"Tool and Die",
"Toolndie",
"Warehouse",
]
.into_iter()
.map(canonicalize_building_like_name)
.collect::<BTreeSet<_>>();
let nonzero_stock_family_aliases = nonzero_stock_building_header_family_aliases();
let mut overlaps = summaries
.iter()
.filter_map(|entry| {
@ -4872,6 +4855,71 @@ fn summarize_peer_site_selector_candidate_saved_nonzero_companion_building_famil
overlaps
}
fn summarize_peer_site_selector_candidate_saved_nonzero_companion_building_family_residues(
summaries: &[SmpSavePlacedStructureNonzeroCompanionNamePairSummaryEntry],
) -> Vec<SmpSavePlacedStructureNonzeroCompanionBuildingFamilyResidueSummaryEntry> {
let nonzero_stock_family_aliases = nonzero_stock_building_header_family_aliases();
let mut residues = summaries
.iter()
.filter(|entry| {
!nonzero_stock_family_aliases
.contains(&canonicalize_building_like_name(&entry.primary_name))
&& !nonzero_stock_family_aliases
.contains(&canonicalize_building_like_name(&entry.secondary_name))
})
.map(
|entry| SmpSavePlacedStructureNonzeroCompanionBuildingFamilyResidueSummaryEntry {
companion_byte_hex: entry.companion_byte_hex.clone(),
primary_name: entry.primary_name.clone(),
secondary_name: entry.secondary_name.clone(),
count: entry.count,
},
)
.collect::<Vec<_>>();
residues.sort_by(|left, right| {
right
.count
.cmp(&left.count)
.then_with(|| left.companion_byte_hex.cmp(&right.companion_byte_hex))
.then_with(|| left.primary_name.cmp(&right.primary_name))
.then_with(|| left.secondary_name.cmp(&right.secondary_name))
});
residues.truncate(10);
residues
}
fn nonzero_stock_building_header_family_aliases() -> BTreeSet<String> {
[
"Brewery",
"ConcretePlant",
"ConstructionFirm",
"DairyProcessor",
"Distillery",
"ElectronicsPlant",
"Furnace",
"FurnitureFactory",
"Hospital",
"Lumbermill",
"MachineShop",
"MeatPackingPlant",
"Museum",
"PaperMill",
"PharmaceuticalPlant",
"Port",
"Recycling Plant",
"Steel Mill",
"Textile Mill",
"TextileMill",
"Tire Factory",
"Tool and Die",
"Toolndie",
"Warehouse",
]
.into_iter()
.map(canonicalize_building_like_name)
.collect()
}
fn canonicalize_building_like_name(name: &str) -> String {
name.chars()
.filter(|ch| !matches!(ch, ' ' | '_' | '-'))
@ -4948,6 +4996,10 @@ fn build_periodic_company_service_trace_report(
summarize_peer_site_selector_candidate_saved_nonzero_companion_building_family_overlaps(
&peer_site_selector_candidate_saved_nonzero_companion_name_pair_summaries,
);
let peer_site_selector_candidate_saved_nonzero_companion_building_family_residue_summaries =
summarize_peer_site_selector_candidate_saved_nonzero_companion_building_family_residues(
&peer_site_selector_candidate_saved_nonzero_companion_name_pair_summaries,
);
let peer_site_persisted_selector_bundle_fields = vec![
"0x5dc1 payload lane [owner+0x23e] restored by 0x0045c150 and later fed into 0x0045c36e"
.to_string(),
@ -5744,6 +5796,20 @@ fn build_periodic_company_service_trace_report(
dominant_overlap.count
));
}
if !peer_site_selector_candidate_saved_nonzero_companion_building_family_residue_summaries
.is_empty()
{
let dominant_residue =
&peer_site_selector_candidate_saved_nonzero_companion_building_family_residue_summaries
[0];
notes.push(format!(
"The same trace now keeps the explicit non-overlap residue visible too: the leading saved pair still outside that recovered nonzero stock `.bty` family is {} / {} with byte {} x{}. That keeps the current Tier-2/source-selection queue honest: part of the peer-site nonzero residue now maps cleanly onto the recovered 0x000001f4 industrial/commercial family, but the remaining residue still needs a broader stock-header or later chooser-side explanation rather than being silently folded into the overlap set.",
dominant_residue.primary_name,
dominant_residue.secondary_name,
dominant_residue.companion_byte_hex,
dominant_residue.count
));
}
notes.push(
"Direct disassembly now also separates the narrower peer-class gate from that payload residue: 0x0047fd50 resolves the linked peer through [site+0x04], reads candidate class byte [candidate+0x8c], and returns true only for values 0/1/2 while rejecting 3/4 and above. That means the newly isolated post-secondary byte is not the already-grounded station-or-transit class gate itself; it remains a separate saved discriminator above the restored name-pair payload.".to_string(),
);
@ -5794,6 +5860,7 @@ fn build_periodic_company_service_trace_report(
peer_site_selector_candidate_saved_policy_trailing_word_summaries,
peer_site_selector_candidate_saved_nonzero_companion_name_pair_summaries,
peer_site_selector_candidate_saved_nonzero_companion_building_family_overlap_summaries,
peer_site_selector_candidate_saved_nonzero_companion_building_family_residue_summaries,
peer_site_persisted_selector_bundle_fields,
peer_site_rebuilt_transient_followon_fields,
peer_site_shellless_minimum_persisted_identity_status,
@ -32778,6 +32845,32 @@ mod tests {
assert!(overlaps[1].secondary_matches_nonzero_stock_building_header_family);
}
#[test]
fn summarizes_nonzero_companion_building_family_residues() {
let residues =
summarize_peer_site_selector_candidate_saved_nonzero_companion_building_family_residues(
&[
SmpSavePlacedStructureNonzeroCompanionNamePairSummaryEntry {
companion_byte_hex: "0x01".to_string(),
primary_name: "TextileMill".to_string(),
secondary_name: "TextileMill".to_string(),
count: 9,
},
SmpSavePlacedStructureNonzeroCompanionNamePairSummaryEntry {
companion_byte_hex: "0x01".to_string(),
primary_name: "MunitionsFactory".to_string(),
secondary_name: "MunitionsFactory".to_string(),
count: 1,
},
],
);
assert_eq!(residues.len(), 1);
assert_eq!(residues[0].primary_name, "MunitionsFactory");
assert_eq!(residues[0].secondary_name, "MunitionsFactory");
assert_eq!(residues[0].companion_byte_hex, "0x01");
}
#[test]
fn builds_infrastructure_asset_trace_report_with_alias_disproved_status() {
let mut analysis = empty_analysis_report();

View file

@ -90,6 +90,12 @@
`ConcretePlant`, `ConstructionFirm`, `Hospital`, `Museum`, `PaperMill`, and `Steel Mill`. So
the later numbered clone seam is now bounded above that narrower `0x000001f4` stock family
rather than above the full style/source strip.
The checked-in header-name summaries sharpen that source split again: inside the nonzero family,
`name_0x40` / `name_0x7c` mostly stay on direct display/file roots, but `name_0x5e` clusters
the shared alias roots (`TextileMill x10`, `LumberMill x4`, `MeatPackingPlant x4`,
`Distillery x2`, `Toolndie x2`). So the next load-side source-selection pass should bias toward
that `0x5e` alias-root lane when testing why the later chooser seeds only part of the stock
family into the numbered Tier-2 bank.
The fixed
tail is explicit now too: `0x00444dd0` writes one direct dword from
`[world+0x19]`, one zeroed `0x1f4`-byte slab under `0x32cf`, closes the package, derives the

View file

@ -1331,6 +1331,14 @@
current residue outside the recovered `0x000001f4` stock header family. So the acquisition-side
post-secondary-byte question and the Tier-2 numbered-bank question now share one narrower
industrial/commercial subset frontier instead of two unrelated broad families.
The stock header lanes within that family are narrower now too: `name_0x40` / `name_0x7c`
mostly stay on direct file/display roots (`Warehouse x7` plus singletons like `Brewery`,
`Port`, and `Toolndie`), while `name_0x5e` is the real clustered alias-root lane
(`TextileMill x10`, `LumberMill x4`, `MeatPackingPlant x4`, `Distillery x2`, `Toolndie x2`).
So the next Tier-2 chooser/source-selection pass should treat `0x5e`-style alias roots as the
stronger stock-family clue than the direct-name lanes, and keep the explicit non-overlap residue
(`MunitionsFactory/MunitionsFactory x1`) separate instead of folding it into the recovered
industrial/commercial subset.
The direct `+0xba/+0xbb` writer census now rules out a broad false lead too. The obvious new
stores at `0x004ecd42/0x004ecdaa` and `0x004ed5d5/0x004ed625` are only shell-side
portrait/string refresh helpers over a different id-keyed collection rooted through

View file

@ -1273,6 +1273,18 @@ Working rule:
is `TextileMill`, `Toolndie`, `Brewery`, and `MeatPackingPlant` while
`MunitionsFactory` remains the clear current residue outside the recovered
`0x000001f4` stock header family
- the checked-in building-source summary now says which stock header lane is actually carrying
the shared alias roots too: within that recovered nonzero `0x000001f4` family,
`name_0x40` / `name_0x7c` mostly stay on direct file/display roots (`Warehouse x7` plus
singletons such as `Brewery`, `Port`, and `Toolndie`), while `name_0x5e` is the real
clustered alias-root lane (`TextileMill x10`, `LumberMill x4`, `MeatPackingPlant x4`,
`Distillery x2`, `Toolndie x2`). So the next Tier-2 source-selection pass should treat
`0x5e`-style alias roots as the stronger stock-family clue than the direct-name lanes
- the trace now keeps the explicit non-overlap residue first-class too: the current list outside
that recovered nonzero family is just `MunitionsFactory/MunitionsFactory x1`, so the next
chooser-side/source-selection slice can focus on whether that residue belongs to a zero-valued
stock-header family or to a later live projection seam rather than treating the whole nonzero
post-secondary set as one undifferentiated mystery
- keep the already-grounded `0x0047fd50` class gate separate from that byte: direct disassembly
now says `0x0047fd50` resolves the linked peer through `[site+0x04]`, reads candidate class
byte `[candidate+0x8c]`, and returns true only for `0/1/2` while rejecting `3/4` and above,