Classify zero-family stock alias clusters

This commit is contained in:
Jan Petykiewicz 2026-04-19 15:31:02 -07:00
commit b946e69bd0
4 changed files with 94 additions and 0 deletions

View file

@ -96,6 +96,7 @@ pub struct BuildingTypeRecoveredTableSummary {
pub nonzero_bty_header_name_0x40_summaries: Vec<BuildingTypeBtyHeaderNameSummary>, pub nonzero_bty_header_name_0x40_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub nonzero_bty_header_name_0x5e_summaries: Vec<BuildingTypeBtyHeaderNameSummary>, pub nonzero_bty_header_name_0x5e_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub nonzero_bty_header_name_0x7c_summaries: Vec<BuildingTypeBtyHeaderNameSummary>, pub nonzero_bty_header_name_0x7c_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub bty_header_name_0x5e_dword_summaries: Vec<BuildingTypeBtyHeaderNameDwordSummary>,
} }
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@ -114,6 +115,16 @@ pub struct BuildingTypeBtyHeaderNameSummary {
pub sample_file_names: Vec<String>, pub sample_file_names: Vec<String>,
} }
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BuildingTypeBtyHeaderNameDwordSummary {
pub header_offset_hex: String,
pub header_value: String,
pub dword_0xbb: u32,
pub dword_0xbb_hex: String,
pub file_count: usize,
pub sample_file_names: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BuildingTypeSourceReport { pub struct BuildingTypeSourceReport {
pub directory_path: String, pub directory_path: String,
@ -483,6 +494,8 @@ fn summarize_recovered_table_families(
summarize_nonzero_bty_header_name_lane(files, 0x5e, |probe| &probe.name_0x5e); summarize_nonzero_bty_header_name_lane(files, 0x5e, |probe| &probe.name_0x5e);
let nonzero_bty_header_name_0x7c_summaries = let nonzero_bty_header_name_0x7c_summaries =
summarize_nonzero_bty_header_name_lane(files, 0x7c, |probe| &probe.name_0x7c); summarize_nonzero_bty_header_name_lane(files, 0x7c, |probe| &probe.name_0x7c);
let bty_header_name_0x5e_dword_summaries =
summarize_bty_header_name_lane_by_dword(files, 0x5e, |probe| &probe.name_0x5e);
BuildingTypeRecoveredTableSummary { BuildingTypeRecoveredTableSummary {
recovered_style_themes: RECOVERED_STYLE_THEMES recovered_style_themes: RECOVERED_STYLE_THEMES
@ -500,6 +513,7 @@ fn summarize_recovered_table_families(
nonzero_bty_header_name_0x40_summaries, nonzero_bty_header_name_0x40_summaries,
nonzero_bty_header_name_0x5e_summaries, nonzero_bty_header_name_0x5e_summaries,
nonzero_bty_header_name_0x7c_summaries, nonzero_bty_header_name_0x7c_summaries,
bty_header_name_0x5e_dword_summaries,
} }
} }
@ -549,6 +563,52 @@ fn summarize_nonzero_bty_header_name_lane(
summaries summaries
} }
fn summarize_bty_header_name_lane_by_dword(
files: &[BuildingTypeSourceFile],
offset: u32,
selector: impl Fn(&BuildingTypeBtyHeaderProbe) -> &String,
) -> Vec<BuildingTypeBtyHeaderNameDwordSummary> {
let mut groups = BTreeMap::<(String, u32), Vec<String>>::new();
for file in files {
let Some(probe) = &file.bty_header_probe else {
continue;
};
let header_value = selector(probe).trim();
if header_value.is_empty() {
continue;
}
groups
.entry((header_value.to_string(), probe.dword_0xbb))
.or_default()
.push(file.file_name.clone());
}
let mut summaries = groups
.into_iter()
.map(|((header_value, dword_0xbb), mut file_names)| {
file_names.sort();
file_names.dedup();
BuildingTypeBtyHeaderNameDwordSummary {
header_offset_hex: format!("0x{offset:02x}"),
header_value,
dword_0xbb,
dword_0xbb_hex: format!("0x{dword_0xbb:08x}"),
file_count: file_names.len(),
sample_file_names: file_names.into_iter().take(24).collect(),
}
})
.collect::<Vec<_>>();
summaries.sort_by(|left, right| {
right
.file_count
.cmp(&left.file_count)
.then_with(|| left.dword_0xbb.cmp(&right.dword_0xbb))
.then_with(|| left.header_offset_hex.cmp(&right.header_offset_hex))
.then_with(|| left.header_value.cmp(&right.header_value))
});
summaries
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] #[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
struct BuildingBindingArtifact { struct BuildingBindingArtifact {
bindings: Vec<BuildingBindingRow>, bindings: Vec<BuildingBindingRow>,
@ -731,5 +791,16 @@ mod tests {
sample_file_names: vec!["Port.bty".to_string()], sample_file_names: vec!["Port.bty".to_string()],
}] }]
); );
assert_eq!(
summary.bty_header_name_0x5e_dword_summaries,
vec![BuildingTypeBtyHeaderNameDwordSummary {
header_offset_hex: "0x5e".to_string(),
header_value: "TextileMill".to_string(),
dword_0xbb: 0x01f4,
dword_0xbb_hex: "0x000001f4".to_string(),
file_count: 1,
sample_file_names: vec!["Port.bty".to_string()],
}]
);
} }
} }

View file

@ -96,6 +96,12 @@
`Distillery x2`, `Toolndie x2`). So the next load-side source-selection pass should bias toward `Distillery x2`, `Toolndie x2`). So the next load-side source-selection pass should bias toward
that `0x5e` alias-root lane when testing why the later chooser seeds only part of the stock that `0x5e` alias-root lane when testing why the later chooser seeds only part of the stock
family into the numbered Tier-2 bank. family into the numbered Tier-2 bank.
The same `name_0x5e` dword-family summary now also says the current residue is still stock-side:
`MunitionsFactory` belongs to a zero-valued `WeaponsFactory` alias cluster with
`Electric Plant`, `Fertilizer Factory`, `Nuclear Power Plant`, `Oil Well`, and `Weapons
Factory`. So the next load-side source-selection pass should treat the open question as one of
cluster choice inside the wider stock corpus, not as a jump from stock rows to some unrelated
non-stock family.
The fixed The fixed
tail is explicit now too: `0x00444dd0` writes one direct dword from tail is explicit now too: `0x00444dd0` writes one direct dword from
`[world+0x19]`, one zeroed `0x1f4`-byte slab under `0x32cf`, closes the package, derives the `[world+0x19]`, one zeroed `0x1f4`-byte slab under `0x32cf`, closes the package, derives the

View file

@ -1339,6 +1339,14 @@
stronger stock-family clue than the direct-name lanes, and keep the explicit non-overlap residue stronger stock-family clue than the direct-name lanes, and keep the explicit non-overlap residue
(`MunitionsFactory/MunitionsFactory x1`) separate instead of folding it into the recovered (`MunitionsFactory/MunitionsFactory x1`) separate instead of folding it into the recovered
industrial/commercial subset. industrial/commercial subset.
That non-overlap residue is grounded against the wider stock corpus now too. The same checked-in
`name_0x5e` dword-family summary shows `MunitionsFactory` sits in a zero-valued
`WeaponsFactory` alias cluster (`Electric Plant`, `Fertilizer Factory`, `Munitions Factory`,
`Nuclear Power Plant`, `Oil Well`, `Weapons Factory`) rather than outside stock assets
entirely. So the remaining Tier-2 chooser/source-selection frontier is no longer “stock vs
non-stock”; it is which stock alias-root cluster is selected and why later clone/replay paths
prefer the nonzero `0x000001f4` cluster while the peer-site residue can still surface a
zero-family `WeaponsFactory`-side root.
The direct `+0xba/+0xbb` writer census now rules out a broad false lead too. The obvious new The direct `+0xba/+0xbb` writer census now rules out a broad false lead too. The obvious new
stores at `0x004ecd42/0x004ecdaa` and `0x004ed5d5/0x004ed625` are only shell-side stores at `0x004ecd42/0x004ecdaa` and `0x004ed5d5/0x004ed625` are only shell-side
portrait/string refresh helpers over a different id-keyed collection rooted through portrait/string refresh helpers over a different id-keyed collection rooted through

View file

@ -1285,6 +1285,15 @@ Working rule:
chooser-side/source-selection slice can focus on whether that residue belongs to a zero-valued chooser-side/source-selection slice can focus on whether that residue belongs to a zero-valued
stock-header family or to a later live projection seam rather than treating the whole nonzero stock-header family or to a later live projection seam rather than treating the whole nonzero
post-secondary set as one undifferentiated mystery post-secondary set as one undifferentiated mystery
- that broader stock-header check is now grounded too: the checked-in `name_0x5e` dword-family
summary shows `MunitionsFactory` is not outside stock assets at all. It sits in a zero-valued
`WeaponsFactory` alias cluster (`Electric Plant`, `Fertilizer Factory`, `Munitions Factory`,
`Nuclear Power Plant`, `Oil Well`, `Weapons Factory`) while the recovered nonzero family keeps
its own `TextileMill`, `LumberMill`, `MeatPackingPlant`, `Distillery`, and `Toolndie`
clusters. So the next Tier-2 source-selection question is no longer “stock vs non-stock”; it
is which stock alias-root cluster gets selected, and why some later clone/replay paths prefer
the nonzero `0x000001f4` cluster while the peer-site residue can still surface a zero-family
`WeaponsFactory`-side root
- keep the already-grounded `0x0047fd50` class gate separate from that byte: direct disassembly - keep the already-grounded `0x0047fd50` class gate separate from that byte: direct disassembly
now says `0x0047fd50` resolves the linked peer through `[site+0x04]`, reads candidate class now says `0x0047fd50` resolves the linked peer through `[site+0x04]`, reads candidate class
byte `[candidate+0x8c]`, and returns true only for `0/1/2` while rejecting `3/4` and above, byte `[candidate+0x8c]`, and returns true only for `0/1/2` while rejecting `3/4` and above,