Narrow engine type auxiliary stem frontier

This commit is contained in:
Jan Petykiewicz 2026-04-21 23:44:59 -07:00
commit a265251831
3 changed files with 112 additions and 3 deletions

View file

@ -172,6 +172,8 @@ pub struct EngineTypesInspectionReport {
pub car_auxiliary_stem_counts: BTreeMap<String, usize>,
pub car_auxiliary_stem_relation_counts: BTreeMap<String, usize>,
pub car_auxiliary_stem_distinct_pair_counts: BTreeMap<String, usize>,
pub car_auxiliary_stem_distinct_pattern_counts: BTreeMap<String, usize>,
pub car_auxiliary_stem_distinct_pair_family_stems: BTreeMap<String, Vec<String>>,
pub internal_ne_profile_pk4_match_count: usize,
pub internal_ne_profile_pk4_missing_count: usize,
pub locomotive_pair_internal_ne_profile_pk4_match_count: usize,
@ -439,6 +441,13 @@ pub fn inspect_engine_types_dir(
.iter()
.filter_map(distinct_car_auxiliary_stem_pair_label),
);
let car_auxiliary_stem_distinct_pattern_counts = count_owned_values(
family_entries
.iter()
.filter_map(classify_distinct_car_auxiliary_stem_pattern),
);
let car_auxiliary_stem_distinct_pair_family_stems =
build_distinct_car_auxiliary_stem_pair_family_stems(&family_entries);
let lco_companion_stem_counts = count_named_values(
family_entries
.iter()
@ -611,6 +620,8 @@ pub fn inspect_engine_types_dir(
car_auxiliary_stem_counts,
car_auxiliary_stem_relation_counts,
car_auxiliary_stem_distinct_pair_counts,
car_auxiliary_stem_distinct_pattern_counts,
car_auxiliary_stem_distinct_pair_family_stems,
internal_ne_profile_pk4_match_count,
internal_ne_profile_pk4_missing_count,
locomotive_pair_internal_ne_profile_pk4_match_count,
@ -1010,6 +1021,35 @@ fn distinct_car_auxiliary_stem_pair_label(family: &EngineTypeFamilyEntry) -> Opt
))
}
fn classify_distinct_car_auxiliary_stem_pattern(family: &EngineTypeFamilyEntry) -> Option<String> {
let pair_label = distinct_car_auxiliary_stem_pair_label(family)?;
let internal_root = strip_terminal_role_letter(family.internal_stem.as_deref()?)?;
let auxiliary_root = strip_terminal_role_letter(family.auxiliary_stem.as_deref()?)?;
if internal_root.eq_ignore_ascii_case(auxiliary_root) {
return Some("paired_opposite_role_suffix".to_string());
}
Some(format!("other_distinct_auxiliary_stem:{pair_label}"))
}
fn build_distinct_car_auxiliary_stem_pair_family_stems(
families: &[EngineTypeFamilyEntry],
) -> BTreeMap<String, Vec<String>> {
let mut grouped = BTreeMap::<String, Vec<String>>::new();
for family in families {
let Some(pair_label) = distinct_car_auxiliary_stem_pair_label(family) else {
continue;
};
grouped
.entry(pair_label)
.or_default()
.push(family.canonical_stem.clone());
}
for stems in grouped.values_mut() {
stems.sort();
}
grouped
}
fn strip_terminal_role_letter(value: &str) -> Option<&str> {
let last = value.chars().last()?;
matches!(last, 'L' | 'T' | 'l' | 't').then(|| {
@ -1421,6 +1461,58 @@ mod tests {
assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1));
}
#[test]
fn classifies_distinct_auxiliary_stem_patterns() {
let paired = EngineTypeFamilyEntry {
canonical_stem: "class_a1t".to_string(),
internal_stem: Some("ClassA1T".to_string()),
auxiliary_stem: Some("ClassA1L".to_string()),
..minimal_family_entry()
};
let reordered = EngineTypeFamilyEntry {
canonical_stem: "class_qjl".to_string(),
internal_stem: Some("classqjl".to_string()),
auxiliary_stem: Some("qjclassl".to_string()),
..minimal_family_entry()
};
assert_eq!(
classify_distinct_car_auxiliary_stem_pattern(&paired),
Some("paired_opposite_role_suffix".to_string())
);
assert_eq!(
classify_distinct_car_auxiliary_stem_pattern(&reordered),
Some("other_distinct_auxiliary_stem:classqjl -> qjclassl".to_string())
);
}
#[test]
fn groups_distinct_auxiliary_stem_pairs_by_family() {
let grouped = build_distinct_car_auxiliary_stem_pair_family_stems(&[
EngineTypeFamilyEntry {
canonical_stem: "class_a1t".to_string(),
internal_stem: Some("ClassA1T".to_string()),
auxiliary_stem: Some("ClassA1L".to_string()),
..minimal_family_entry()
},
EngineTypeFamilyEntry {
canonical_stem: "class_qjt".to_string(),
internal_stem: Some("classqjt".to_string()),
auxiliary_stem: Some("qjclasst".to_string()),
..minimal_family_entry()
},
]);
assert_eq!(
grouped.get("ClassA1T -> ClassA1L"),
Some(&vec!["class_a1t".to_string()])
);
assert_eq!(
grouped.get("classqjt -> qjclasst"),
Some(&vec!["class_qjt".to_string()])
);
}
#[test]
fn builds_lco_low_cardinality_lane_counts() {
let lane_counts = build_lco_low_cardinality_lane_counts(

View file

@ -12,16 +12,19 @@ This file is the short active queue for the current runtime and reverse-engineer
- The active static parser head is now the `engine_types` semantics frontier.
The repo now has structural inspectors for `.car`, `.lco`, `.cgo`, and `.cct`, but the binary side is still only partially semantic: the checked 1.05 corpus grounds `.car` fixed strings at `0x0c / 0x48 / 0x84` plus a second fixed stem slot at `0xa2` and a side-view resource name at `0xc0`, while `.lco` carries a stable primary stem at `0x04` and only conditional companion/body slots at `0x0c` and `0x12` when the leading stem slot is padded.
The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, with those five exact internal-to-auxiliary pairs now preserved directly in the report surface, while `.cgo` collapses into five stable scalar ladders instead of arbitrary floats.
The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, with those five exact internal-to-auxiliary pairs now preserved directly in the report surface.
That distinct cohort is narrower than it first looked: `3` of the `5` are just opposite-role tender-to-loco pairings (`ClassA1T -> ClassA1L`, `CramptonT -> CramptonL`, `WhaleT -> WhaleL`), and only the two `QJ` rows remain as reordered alias roots.
On the cargo side, `.cgo` now collapses into five stable scalar ladders instead of arbitrary floats.
The early `.lco` lane block is now partially partitioned too: only offsets `0x20`, `0x34`, `0x38`, `0x3c`, `0x44`, `0x48`, and `0x54` behave like low-cardinality buckets, while the other early lanes still look high-variance.
The side-view resource path is now grounded into `Data/2D/rt3_2IMB.PK4`, and the `.imb` parser now decodes shipped comment-suffixed numeric rows plus `_NE` profile fields such as `HorizontalScaleModifier` and `ImageWHScaled`.
The checked PK4 linkage split is now explicit too: `132 / 145` side-view resource names resolve directly in 1.05, but the remaining `13` are the missing `CarSideView_3.imb` cohort and that hole exists in both checked installs, while `43 / 145` derived `{internal_stem}_NE.imb` names resolve and all of those hits belong to matched locomotive pairs.
The parser now preserves that `CarSideView_3` miss cohort exactly, and it also preserves the tiny conditional `.lco` companion-profile seam directly: in 1.05 the padded `.lco` companion/body slots collapse to `Zephyr / (none) / CarSideView_3` for `242_a1_l`, `gp35l`, `u1l`, and `zephyrl`, and to `VL80T / Loco / CarSideView_{1,2}` for `be 5-7`, `f3 loco`, and `gp7`.
The classic install widens the same `0xc0` seam further: the side-view slot is not only `CarSideView_*` but also a larger unresolved `*_Profile.imb` family, and every checked classic `*_Profile.imb` reference currently misses the packaged `rt3_2IMB.PK4` surface too.
A whole-tree file census now rules out the simple fallback too: none of those `*_Profile.imb` names, and no `CarSideView_3.imb`, exist as loose files anywhere in the checked `rt3/` or `rt3_105/` trees.
The packaged profile metadata is stable enough to summarize: `CarSideView_1` is `512x512` at `0.04` VRAM, `CarSideView_2` is `512x256` at `0.02`, and every packaged `_NE` profile is `512x128` with `HorizontalScaleModifier = 0.75` and `MaxPercentOfInterfaceVRAM = 0.09`.
The `_NE` split is now aligned with the locomotive display census too: all `43` packaged `_NE` hits live inside the grounded display prefix, and all `5` unmatched display-tail families are still missing packaged `_NE` profiles.
The cargo side is partially linked now as well: the `.cgo` ladder families and `.cct` sidecar identifiers share the same cargo-family keys for ten checked families, with `Troop` left as the only `.cct`-only outlier.
The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, decide whether the classic `*_Profile.imb` side-view references are dead loose-file dependencies or a still-unmapped package family, and decide how far the `.cgo` ladders plus the low-cardinality `.lco` lanes can be grounded without overclaiming semantics. The latest corpus check did narrow one point already: the low-cardinality `.lco` lanes do not split cleanly on `_NE` presence, so that branch now wants binary/code correlation rather than more aggregate-only counting.
The next honest static work is to decide whether the two remaining reordered `QJ` auxiliary roots are just alternate content aliases or evidence of a narrower foreign-display/image root, decide whether the classic `*_Profile.imb` side-view references are dead loose-file dependencies or a still-unmapped package family, and decide how far the `.cgo` ladders plus the low-cardinality `.lco` lanes can be grounded without overclaiming semantics. The latest corpus check did narrow one point already: the low-cardinality `.lco` lanes do not split cleanly on `_NE` presence, so that branch now wants binary/code correlation rather than more aggregate-only counting.
Preserved checked parser detail now lives in [EngineTypes parser semantics](rehost-queue/engine-types-parser-semantics-2026-04-21.md).
Preserved checked format inventory detail now lives in [RT3 format inventory](rehost-queue/format-inventory-2026-04-21.md).

View file

@ -23,6 +23,14 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
- `WhaleT -> WhaleL`
- `classqjl -> qjclassl`
- `classqjt -> qjclasst`
- The parser now preserves one tighter split inside those five distinct cases:
- `paired_opposite_role_suffix`: `3`
- `ClassA1T -> ClassA1L` on `class_a1t`
- `CramptonT -> CramptonL` on `crampton tender`
- `WhaleT -> WhaleL` on `orca nx462 tender`
- reordered `QJ` aliases: `2`
- `classqjl -> qjclassl` on `class_qjl`
- `classqjt -> qjclasst` on `class_qjt`
- `.lco` carries one always-present primary stem at `0x04`.
- `.lco` only carries meaningful secondary slots when that leading stem slot is padded:
- `0x0c`: conditional companion stem such as `VL80T` or `Zephyr`
@ -119,6 +127,10 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
- `TenWheeler_Profile.imb`
- `V2Class_Profile.imb`
- `Vittorio_Profile.imb`
- A whole-tree file census now rules out the simplest fallback for that family:
- none of those checked classic `*_Profile.imb` names exists as a loose file anywhere under the
checked `rt3/` or `rt3_105/` trees
- `CarSideView_3.imb` also does not exist as a loose file anywhere under those trees
- The packaged profile metadata is now bounded too:
- `CarSideView_1.imb`: `512x512`, `MaxPercentOfInterfaceVRAM = 0.04`
- `CarSideView_2.imb`: `512x256`, `MaxPercentOfInterfaceVRAM = 0.02`
@ -168,8 +180,10 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
## Remaining Static Questions
- `.car`
- what the `0xa2` auxiliary stem really represents in the five remaining distinct cases:
- what the `0xa2` auxiliary stem really represents in the two remaining reordered `QJ` cases:
alternate content root, paired tender/loco image root, or a narrower foreign-display alias
- whether the three opposite-role pairings are best described as direct tender-to-loco image
roots or some slightly broader paired-display fallback convention
- whether the trailing side-view resource can be tied cleanly to the PK4-backed `CarSideView_*`
metadata, the classic `*_Profile.imb` reference family, and the engine-specific `_NE.imb`
profiles without inventing frontend semantics