diff --git a/crates/rrt-runtime/src/inspect/engine_types.rs b/crates/rrt-runtime/src/inspect/engine_types.rs index c5d555b..4b8ca9c 100644 --- a/crates/rrt-runtime/src/inspect/engine_types.rs +++ b/crates/rrt-runtime/src/inspect/engine_types.rs @@ -172,6 +172,8 @@ pub struct EngineTypesInspectionReport { pub car_auxiliary_stem_counts: BTreeMap, pub car_auxiliary_stem_relation_counts: BTreeMap, pub car_auxiliary_stem_distinct_pair_counts: BTreeMap, + pub car_auxiliary_stem_distinct_pattern_counts: BTreeMap, + pub car_auxiliary_stem_distinct_pair_family_stems: BTreeMap>, pub internal_ne_profile_pk4_match_count: usize, pub internal_ne_profile_pk4_missing_count: usize, pub locomotive_pair_internal_ne_profile_pk4_match_count: usize, @@ -439,6 +441,13 @@ pub fn inspect_engine_types_dir( .iter() .filter_map(distinct_car_auxiliary_stem_pair_label), ); + let car_auxiliary_stem_distinct_pattern_counts = count_owned_values( + family_entries + .iter() + .filter_map(classify_distinct_car_auxiliary_stem_pattern), + ); + let car_auxiliary_stem_distinct_pair_family_stems = + build_distinct_car_auxiliary_stem_pair_family_stems(&family_entries); let lco_companion_stem_counts = count_named_values( family_entries .iter() @@ -611,6 +620,8 @@ pub fn inspect_engine_types_dir( car_auxiliary_stem_counts, car_auxiliary_stem_relation_counts, car_auxiliary_stem_distinct_pair_counts, + car_auxiliary_stem_distinct_pattern_counts, + car_auxiliary_stem_distinct_pair_family_stems, internal_ne_profile_pk4_match_count, internal_ne_profile_pk4_missing_count, locomotive_pair_internal_ne_profile_pk4_match_count, @@ -1010,6 +1021,35 @@ fn distinct_car_auxiliary_stem_pair_label(family: &EngineTypeFamilyEntry) -> Opt )) } +fn classify_distinct_car_auxiliary_stem_pattern(family: &EngineTypeFamilyEntry) -> Option { + let pair_label = distinct_car_auxiliary_stem_pair_label(family)?; + let internal_root = strip_terminal_role_letter(family.internal_stem.as_deref()?)?; + let auxiliary_root = strip_terminal_role_letter(family.auxiliary_stem.as_deref()?)?; + if internal_root.eq_ignore_ascii_case(auxiliary_root) { + return Some("paired_opposite_role_suffix".to_string()); + } + Some(format!("other_distinct_auxiliary_stem:{pair_label}")) +} + +fn build_distinct_car_auxiliary_stem_pair_family_stems( + families: &[EngineTypeFamilyEntry], +) -> BTreeMap> { + let mut grouped = BTreeMap::>::new(); + for family in families { + let Some(pair_label) = distinct_car_auxiliary_stem_pair_label(family) else { + continue; + }; + grouped + .entry(pair_label) + .or_default() + .push(family.canonical_stem.clone()); + } + for stems in grouped.values_mut() { + stems.sort(); + } + grouped +} + fn strip_terminal_role_letter(value: &str) -> Option<&str> { let last = value.chars().last()?; matches!(last, 'L' | 'T' | 'l' | 't').then(|| { @@ -1421,6 +1461,58 @@ mod tests { assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1)); } + #[test] + fn classifies_distinct_auxiliary_stem_patterns() { + let paired = EngineTypeFamilyEntry { + canonical_stem: "class_a1t".to_string(), + internal_stem: Some("ClassA1T".to_string()), + auxiliary_stem: Some("ClassA1L".to_string()), + ..minimal_family_entry() + }; + let reordered = EngineTypeFamilyEntry { + canonical_stem: "class_qjl".to_string(), + internal_stem: Some("classqjl".to_string()), + auxiliary_stem: Some("qjclassl".to_string()), + ..minimal_family_entry() + }; + + assert_eq!( + classify_distinct_car_auxiliary_stem_pattern(&paired), + Some("paired_opposite_role_suffix".to_string()) + ); + assert_eq!( + classify_distinct_car_auxiliary_stem_pattern(&reordered), + Some("other_distinct_auxiliary_stem:classqjl -> qjclassl".to_string()) + ); + } + + #[test] + fn groups_distinct_auxiliary_stem_pairs_by_family() { + let grouped = build_distinct_car_auxiliary_stem_pair_family_stems(&[ + EngineTypeFamilyEntry { + canonical_stem: "class_a1t".to_string(), + internal_stem: Some("ClassA1T".to_string()), + auxiliary_stem: Some("ClassA1L".to_string()), + ..minimal_family_entry() + }, + EngineTypeFamilyEntry { + canonical_stem: "class_qjt".to_string(), + internal_stem: Some("classqjt".to_string()), + auxiliary_stem: Some("qjclasst".to_string()), + ..minimal_family_entry() + }, + ]); + + assert_eq!( + grouped.get("ClassA1T -> ClassA1L"), + Some(&vec!["class_a1t".to_string()]) + ); + assert_eq!( + grouped.get("classqjt -> qjclasst"), + Some(&vec!["class_qjt".to_string()]) + ); + } + #[test] fn builds_lco_low_cardinality_lane_counts() { let lane_counts = build_lco_low_cardinality_lane_counts( diff --git a/docs/rehost-queue.md b/docs/rehost-queue.md index c361792..3eede80 100644 --- a/docs/rehost-queue.md +++ b/docs/rehost-queue.md @@ -12,16 +12,19 @@ This file is the short active queue for the current runtime and reverse-engineer - The active static parser head is now the `engine_types` semantics frontier. The repo now has structural inspectors for `.car`, `.lco`, `.cgo`, and `.cct`, but the binary side is still only partially semantic: the checked 1.05 corpus grounds `.car` fixed strings at `0x0c / 0x48 / 0x84` plus a second fixed stem slot at `0xa2` and a side-view resource name at `0xc0`, while `.lco` carries a stable primary stem at `0x04` and only conditional companion/body slots at `0x0c` and `0x12` when the leading stem slot is padded. - The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, with those five exact internal-to-auxiliary pairs now preserved directly in the report surface, while `.cgo` collapses into five stable scalar ladders instead of arbitrary floats. + The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, with those five exact internal-to-auxiliary pairs now preserved directly in the report surface. + That distinct cohort is narrower than it first looked: `3` of the `5` are just opposite-role tender-to-loco pairings (`ClassA1T -> ClassA1L`, `CramptonT -> CramptonL`, `WhaleT -> WhaleL`), and only the two `QJ` rows remain as reordered alias roots. + On the cargo side, `.cgo` now collapses into five stable scalar ladders instead of arbitrary floats. The early `.lco` lane block is now partially partitioned too: only offsets `0x20`, `0x34`, `0x38`, `0x3c`, `0x44`, `0x48`, and `0x54` behave like low-cardinality buckets, while the other early lanes still look high-variance. The side-view resource path is now grounded into `Data/2D/rt3_2IMB.PK4`, and the `.imb` parser now decodes shipped comment-suffixed numeric rows plus `_NE` profile fields such as `HorizontalScaleModifier` and `ImageWHScaled`. The checked PK4 linkage split is now explicit too: `132 / 145` side-view resource names resolve directly in 1.05, but the remaining `13` are the missing `CarSideView_3.imb` cohort and that hole exists in both checked installs, while `43 / 145` derived `{internal_stem}_NE.imb` names resolve and all of those hits belong to matched locomotive pairs. The parser now preserves that `CarSideView_3` miss cohort exactly, and it also preserves the tiny conditional `.lco` companion-profile seam directly: in 1.05 the padded `.lco` companion/body slots collapse to `Zephyr / (none) / CarSideView_3` for `242_a1_l`, `gp35l`, `u1l`, and `zephyrl`, and to `VL80T / Loco / CarSideView_{1,2}` for `be 5-7`, `f3 loco`, and `gp7`. The classic install widens the same `0xc0` seam further: the side-view slot is not only `CarSideView_*` but also a larger unresolved `*_Profile.imb` family, and every checked classic `*_Profile.imb` reference currently misses the packaged `rt3_2IMB.PK4` surface too. + A whole-tree file census now rules out the simple fallback too: none of those `*_Profile.imb` names, and no `CarSideView_3.imb`, exist as loose files anywhere in the checked `rt3/` or `rt3_105/` trees. The packaged profile metadata is stable enough to summarize: `CarSideView_1` is `512x512` at `0.04` VRAM, `CarSideView_2` is `512x256` at `0.02`, and every packaged `_NE` profile is `512x128` with `HorizontalScaleModifier = 0.75` and `MaxPercentOfInterfaceVRAM = 0.09`. The `_NE` split is now aligned with the locomotive display census too: all `43` packaged `_NE` hits live inside the grounded display prefix, and all `5` unmatched display-tail families are still missing packaged `_NE` profiles. The cargo side is partially linked now as well: the `.cgo` ladder families and `.cct` sidecar identifiers share the same cargo-family keys for ten checked families, with `Troop` left as the only `.cct`-only outlier. - The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, decide whether the classic `*_Profile.imb` side-view references are dead loose-file dependencies or a still-unmapped package family, and decide how far the `.cgo` ladders plus the low-cardinality `.lco` lanes can be grounded without overclaiming semantics. The latest corpus check did narrow one point already: the low-cardinality `.lco` lanes do not split cleanly on `_NE` presence, so that branch now wants binary/code correlation rather than more aggregate-only counting. + The next honest static work is to decide whether the two remaining reordered `QJ` auxiliary roots are just alternate content aliases or evidence of a narrower foreign-display/image root, decide whether the classic `*_Profile.imb` side-view references are dead loose-file dependencies or a still-unmapped package family, and decide how far the `.cgo` ladders plus the low-cardinality `.lco` lanes can be grounded without overclaiming semantics. The latest corpus check did narrow one point already: the low-cardinality `.lco` lanes do not split cleanly on `_NE` presence, so that branch now wants binary/code correlation rather than more aggregate-only counting. Preserved checked parser detail now lives in [EngineTypes parser semantics](rehost-queue/engine-types-parser-semantics-2026-04-21.md). Preserved checked format inventory detail now lives in [RT3 format inventory](rehost-queue/format-inventory-2026-04-21.md). diff --git a/docs/rehost-queue/engine-types-parser-semantics-2026-04-21.md b/docs/rehost-queue/engine-types-parser-semantics-2026-04-21.md index da811fa..fa03bfe 100644 --- a/docs/rehost-queue/engine-types-parser-semantics-2026-04-21.md +++ b/docs/rehost-queue/engine-types-parser-semantics-2026-04-21.md @@ -23,6 +23,14 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. - `WhaleT -> WhaleL` - `classqjl -> qjclassl` - `classqjt -> qjclasst` +- The parser now preserves one tighter split inside those five distinct cases: + - `paired_opposite_role_suffix`: `3` + - `ClassA1T -> ClassA1L` on `class_a1t` + - `CramptonT -> CramptonL` on `crampton tender` + - `WhaleT -> WhaleL` on `orca nx462 tender` + - reordered `QJ` aliases: `2` + - `classqjl -> qjclassl` on `class_qjl` + - `classqjt -> qjclasst` on `class_qjt` - `.lco` carries one always-present primary stem at `0x04`. - `.lco` only carries meaningful secondary slots when that leading stem slot is padded: - `0x0c`: conditional companion stem such as `VL80T` or `Zephyr` @@ -119,6 +127,10 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. - `TenWheeler_Profile.imb` - `V2Class_Profile.imb` - `Vittorio_Profile.imb` +- A whole-tree file census now rules out the simplest fallback for that family: + - none of those checked classic `*_Profile.imb` names exists as a loose file anywhere under the + checked `rt3/` or `rt3_105/` trees + - `CarSideView_3.imb` also does not exist as a loose file anywhere under those trees - The packaged profile metadata is now bounded too: - `CarSideView_1.imb`: `512x512`, `MaxPercentOfInterfaceVRAM = 0.04` - `CarSideView_2.imb`: `512x256`, `MaxPercentOfInterfaceVRAM = 0.02` @@ -168,8 +180,10 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. ## Remaining Static Questions - `.car` - - what the `0xa2` auxiliary stem really represents in the five remaining distinct cases: + - what the `0xa2` auxiliary stem really represents in the two remaining reordered `QJ` cases: alternate content root, paired tender/loco image root, or a narrower foreign-display alias + - whether the three opposite-role pairings are best described as direct tender-to-loco image + roots or some slightly broader paired-display fallback convention - whether the trailing side-view resource can be tied cleanly to the PK4-backed `CarSideView_*` metadata, the classic `*_Profile.imb` reference family, and the engine-specific `_NE.imb` profiles without inventing frontend semantics