Narrow tier2 stock selector to machine shop outlier

This commit is contained in:
Jan Petykiewicz 2026-04-19 15:38:14 -07:00
commit cf6d36b8e7
4 changed files with 176 additions and 5 deletions

View file

@ -97,6 +97,7 @@ pub struct BuildingTypeRecoveredTableSummary {
pub nonzero_bty_header_name_0x5e_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub nonzero_bty_header_name_0x7c_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub bty_header_name_0x5e_dword_summaries: Vec<BuildingTypeBtyHeaderNameDwordSummary>,
pub bty_name_0x5e_bca_selector_summaries: Vec<BuildingTypeBtyNameBcaSelectorSummary>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@ -125,6 +126,18 @@ pub struct BuildingTypeBtyHeaderNameDwordSummary {
pub sample_file_names: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BuildingTypeBtyNameBcaSelectorSummary {
pub header_offset_hex: String,
pub header_value: String,
pub dword_0xbb: u32,
pub dword_0xbb_hex: String,
pub byte_0xba_hex: String,
pub byte_0xbb_hex: String,
pub file_count: usize,
pub sample_file_names: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BuildingTypeSourceReport {
pub directory_path: String,
@ -496,6 +509,8 @@ fn summarize_recovered_table_families(
summarize_nonzero_bty_header_name_lane(files, 0x7c, |probe| &probe.name_0x7c);
let bty_header_name_0x5e_dword_summaries =
summarize_bty_header_name_lane_by_dword(files, 0x5e, |probe| &probe.name_0x5e);
let bty_name_0x5e_bca_selector_summaries =
summarize_bty_name_0x5e_bca_selector_patterns(entries, files);
BuildingTypeRecoveredTableSummary {
recovered_style_themes: RECOVERED_STYLE_THEMES
@ -514,6 +529,7 @@ fn summarize_recovered_table_families(
nonzero_bty_header_name_0x5e_summaries,
nonzero_bty_header_name_0x7c_summaries,
bty_header_name_0x5e_dword_summaries,
bty_name_0x5e_bca_selector_summaries,
}
}
@ -609,6 +625,81 @@ fn summarize_bty_header_name_lane_by_dword(
summaries
}
fn summarize_bty_name_0x5e_bca_selector_patterns(
entries: &[BuildingTypeSourceEntry],
files: &[BuildingTypeSourceFile],
) -> Vec<BuildingTypeBtyNameBcaSelectorSummary> {
let file_by_name = files
.iter()
.map(|file| (file.file_name.as_str(), file))
.collect::<BTreeMap<_, _>>();
let mut groups = BTreeMap::<(String, u32, String, String), Vec<String>>::new();
for entry in entries {
let bty_file = entry
.file_names
.iter()
.filter_map(|name| file_by_name.get(name.as_str()))
.find(|file| matches!(file.source_kind, BuildingTypeSourceKind::Bty));
let bca_file = entry
.file_names
.iter()
.filter_map(|name| file_by_name.get(name.as_str()))
.find(|file| matches!(file.source_kind, BuildingTypeSourceKind::Bca));
let (Some(bty_file), Some(bca_file)) = (bty_file, bca_file) else {
continue;
};
let (Some(bty_probe), Some(bca_probe)) =
(&bty_file.bty_header_probe, &bca_file.bca_selector_probe)
else {
continue;
};
let header_value = bty_probe.name_0x5e.trim();
if header_value.is_empty() {
continue;
}
groups
.entry((
header_value.to_string(),
bty_probe.dword_0xbb,
bca_probe.byte_0xba_hex.clone(),
bca_probe.byte_0xbb_hex.clone(),
))
.or_default()
.push(bty_file.file_name.clone());
}
let mut summaries = groups
.into_iter()
.map(
|((header_value, dword_0xbb, byte_0xba_hex, byte_0xbb_hex), mut file_names)| {
file_names.sort();
file_names.dedup();
BuildingTypeBtyNameBcaSelectorSummary {
header_offset_hex: "0x5e".to_string(),
header_value,
dword_0xbb,
dword_0xbb_hex: format!("0x{dword_0xbb:08x}"),
byte_0xba_hex,
byte_0xbb_hex,
file_count: file_names.len(),
sample_file_names: file_names.into_iter().take(24).collect(),
}
},
)
.collect::<Vec<_>>();
summaries.sort_by(|left, right| {
right
.file_count
.cmp(&left.file_count)
.then_with(|| left.dword_0xbb.cmp(&right.dword_0xbb))
.then_with(|| left.header_value.cmp(&right.header_value))
.then_with(|| left.byte_0xba_hex.cmp(&right.byte_0xba_hex))
.then_with(|| left.byte_0xbb_hex.cmp(&right.byte_0xbb_hex))
});
summaries
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
struct BuildingBindingArtifact {
bindings: Vec<BuildingBindingRow>,
@ -697,6 +788,12 @@ mod tests {
source_kinds: vec![BuildingTypeSourceKind::Bty],
file_names: vec!["ServiceTower.bty".to_string()],
},
BuildingTypeSourceEntry {
canonical_stem: canonicalize_building_stem("Port"),
raw_stems: vec!["Port".to_string()],
source_kinds: vec![BuildingTypeSourceKind::Bca, BuildingTypeSourceKind::Bty],
file_names: vec!["Port.bca".to_string(), "Port.bty".to_string()],
},
];
let files = vec![
BuildingTypeSourceFile {
@ -726,12 +823,21 @@ mod tests {
}),
},
BuildingTypeSourceFile {
file_name: "Warehouse.bca".to_string(),
raw_stem: "Warehouse".to_string(),
canonical_stem: canonicalize_building_stem("Warehouse"),
file_name: "Port.bca".to_string(),
raw_stem: "Port".to_string(),
canonical_stem: canonicalize_building_stem("Port"),
source_kind: BuildingTypeSourceKind::Bca,
byte_len: None,
bca_selector_probe: None,
bca_selector_probe: Some(BuildingTypeBcaSelectorProbe {
byte_0xb8: 0x00,
byte_0xb8_hex: "0x00".to_string(),
byte_0xb9: 0x00,
byte_0xb9_hex: "0x00".to_string(),
byte_0xba: 0x00,
byte_0xba_hex: "0x00".to_string(),
byte_0xbb: 0x00,
byte_0xbb_hex: "0x00".to_string(),
}),
bty_header_probe: None,
},
];
@ -753,7 +859,7 @@ mod tests {
);
assert_eq!(
summary.bare_port_warehouse_files,
vec!["Port.bty".to_string(), "Warehouse.bca".to_string()]
vec!["Port.bca".to_string(), "Port.bty".to_string()]
);
assert_eq!(summary.nonzero_bty_header_dword_summaries.len(), 1);
assert_eq!(
@ -802,5 +908,18 @@ mod tests {
sample_file_names: vec!["Port.bty".to_string()],
}]
);
assert_eq!(
summary.bty_name_0x5e_bca_selector_summaries,
vec![BuildingTypeBtyNameBcaSelectorSummary {
header_offset_hex: "0x5e".to_string(),
header_value: "TextileMill".to_string(),
dword_0xbb: 0x01f4,
dword_0xbb_hex: "0x000001f4".to_string(),
byte_0xba_hex: "0x00".to_string(),
byte_0xbb_hex: "0x00".to_string(),
file_count: 1,
sample_file_names: vec!["Port.bty".to_string()],
}]
);
}
}

View file

@ -102,6 +102,16 @@
Factory`. So the next load-side source-selection pass should treat the open question as one of
cluster choice inside the wider stock corpus, not as a jump from stock rows to some unrelated
non-stock family.
The checked-in cluster-to-selector join sharpens that again: every grounded `name_0x5e` alias
cluster is zero-selector by default, including both the nonzero `0x000001f4` industrial subset
and the zero-family `WeaponsFactory` subset. The only surfaced nonzero joined outlier is
`MachineShop` inside the `TextileMill` cluster (`byte_0xba = 0x3f`, `byte_0xbb = 0x00`). So the
next load-side source-selection pass should focus on that row-level outlier and any matching
replay/seed logic, not on a whole-cluster nonzero bank hypothesis.
The global stock selector report tightens that further: the full `MachineShop.bca` signature
(`0x00/0x80/0x3f/0x00` across `0xb8..0xbb`) is unique across the checked-in stock `.bca`
corpus. So the remaining load-side Tier-2 frontier is one surfaced stock-file outlier plus the
later clone/replay logic that amplifies it, not a hidden wider stock-family selector strip.
The fixed
tail is explicit now too: `0x00444dd0` writes one direct dword from
`[world+0x19]`, one zeroed `0x1f4`-byte slab under `0x32cf`, closes the package, derives the

View file

@ -1347,6 +1347,20 @@
non-stock”; it is which stock alias-root cluster is selected and why later clone/replay paths
prefer the nonzero `0x000001f4` cluster while the peer-site residue can still surface a
zero-family `WeaponsFactory`-side root.
The stock-cluster-to-selector join is explicit now too. The checked-in `name_0x5e` + `.bca`
selector summary shows every grounded alias cluster is zero-selector by default, including the
nonzero `0x000001f4` clusters (`TextileMill x9`, `LumberMill x4`, `MeatPackingPlant x4`,
`Distillery x2`, `Toolndie x2`) and the zero-family `WeaponsFactory x6` cluster. The only
surfaced nonzero joined outlier is `MachineShop` inside the nonzero `TextileMill` cluster
(`byte_0xba = 0x3f`, `byte_0xbb = 0x00`). So the next Tier-2 source-selection pass should no
longer ask whether whole alias clusters map to nonzero bank bytes; it should ask why one
specific stock row inside the `TextileMill` cluster surfaces a nonzero selector while its peer
rows stay zero.
The global stock `.bca` selector report narrows that again: the exact `MachineShop.bca`
signature (`byte_0xb8 = 0x00`, `byte_0xb9 = 0x80`, `byte_0xba = 0x3f`, `byte_0xbb = 0x00`) is
unique across the checked-in stock corpus. So the remaining Tier-2 source frontier is not a
broad hidden family of nonzero stock rows; it is one surfaced stock-file outlier plus whatever
later clone/replay logic amplifies it into the numbered banked rows.
The direct `+0xba/+0xbb` writer census now rules out a broad false lead too. The obvious new
stores at `0x004ecd42/0x004ecdaa` and `0x004ed5d5/0x004ed625` are only shell-side
portrait/string refresh helpers over a different id-keyed collection rooted through
@ -1362,6 +1376,13 @@
naming branch from cloned bit `[candidate+0xba]`. So the unresolved Tier-2 seam is no longer
“find any direct writer to candidate `+0xba/+0xbb`”; it is “find the earlier seed or projection
owner that first makes some source/live rows reach that clone path with nonzero bank bytes.”
The top-level stock handoff above that clone pass is tighter now too. Direct disassembly of
`0x004196c0` shows the broader stock `*.bca` rebuild loop formatting the wildcard path rooted at
`0x005c93fc`, iterating the local `0x005c8190/0x005c8194/0x005c819c` find-first/find-next
strip, calling the per-file stock loader `0x00414490` for each hit, and only then
tail-calling `0x00419230`. So the remaining Tier-2 source problem is increasingly “which stock
rows that rebuild admits or seeds with nonzero bank bytes” rather than “which unrelated later
service invokes the banked clone pass.”
The stock owner chain above those parser fields is explicit now too:
`0x00438c8e -> 0x004131f0 -> 0x00412fb0 -> 0x004120b0` constructs root `0x0062b268`, and the
adjacent `.rdata` strings at `0x005c93f4..0x005c940e` prove that `0x00412fb0` is the

View file

@ -861,6 +861,13 @@ Working rule:
`Warehouse%02d` banks are hidden station-style aliases; it is why the later clone path prefers
this narrower `0x000001f4` stock family over the zero-valued station and
maintenance/service families when it seeds those numbered banks.
The stock rebuild handoff above that seed question is tighter now too. Direct disassembly of
`0x004196c0` shows the broader stock `*.bca` rebuild loop formatting the wildcard path rooted
at `0x005c93fc`, iterating the file enumerator through the `0x005c8190/0x005c8194/0x005c819c`
find-first/find-next strip, calling the per-file stock loader `0x00414490` for each hit, and
only then tail-calling `0x00419230`. So the remaining Tier-2 source problem is increasingly
“which stock rows that rebuild admits or seeds with nonzero bank bytes” rather than “which
unrelated later scheduler invokes the banked clone pass.”
The direct `+0xba/+0xbb` writer census is narrower now too. The obvious newly surfaced stores
at `0x004ecd42/0x004ecdaa` and `0x004ed5d5/0x004ed625` are only shell-side portrait/string
refresh helpers: they walk a separate id-keyed collection through `0x0053f830`, free and
@ -1294,6 +1301,20 @@ Working rule:
is which stock alias-root cluster gets selected, and why some later clone/replay paths prefer
the nonzero `0x000001f4` cluster while the peer-site residue can still surface a zero-family
`WeaponsFactory`-side root
- the stock-cluster-to-selector join is explicit now too: the checked-in `name_0x5e` +
`.bca` selector summary shows every grounded alias cluster is zero-selector by default,
including the nonzero `0x000001f4` clusters (`TextileMill x9`, `LumberMill x4`,
`MeatPackingPlant x4`, `Distillery x2`, `Toolndie x2`) and the zero-family
`WeaponsFactory x6` cluster. The only surfaced nonzero joined outlier is
`MachineShop` inside the nonzero `TextileMill` cluster (`byte_0xba = 0x3f`, `byte_0xbb = 0x00`).
So the next Tier-2 source-selection pass should no longer ask whether whole alias clusters map
to nonzero bank bytes; it should ask why one specific stock row inside the `TextileMill`
cluster surfaces a nonzero selector while its peer rows stay zero
- the global stock `.bca` selector report narrows that one step further still: the exact
`MachineShop.bca` signature (`byte_0xb8 = 0x00`, `byte_0xb9 = 0x80`, `byte_0xba = 0x3f`,
`byte_0xbb = 0x00`) is unique across the checked-in stock corpus. So the current Tier-2
frontier is not a broad hidden family of nonzero stock rows; it is a single surfaced stock-file
outlier plus whatever later clone/replay logic amplifies it into the numbered banked rows
- keep the already-grounded `0x0047fd50` class gate separate from that byte: direct disassembly
now says `0x0047fd50` resolves the linked peer through `[site+0x04]`, reads candidate class
byte `[candidate+0x8c]`, and returns true only for `0/1/2` while rejecting `3/4` and above,