diff --git a/crates/rrt-runtime/src/inspect/engine_types.rs b/crates/rrt-runtime/src/inspect/engine_types.rs index e5fb8f8..1b74a93 100644 --- a/crates/rrt-runtime/src/inspect/engine_types.rs +++ b/crates/rrt-runtime/src/inspect/engine_types.rs @@ -146,9 +146,11 @@ pub struct EngineTypesInspectionReport { pub unmatched_cct_file_count: usize, pub car_side_view_resource_counts: BTreeMap, pub car_auxiliary_stem_counts: BTreeMap, + pub car_auxiliary_stem_relation_counts: BTreeMap, pub lco_companion_stem_counts: BTreeMap, pub lco_body_type_label_counts: BTreeMap, pub cgo_scalar_value_counts: BTreeMap, + pub cgo_scalar_ladder_counts: BTreeMap, pub cgo_scalar_values_by_content_stem: BTreeMap>, pub cct_identifier_counts: BTreeMap, pub cct_value_counts: BTreeMap, @@ -356,6 +358,11 @@ pub fn inspect_engine_types_dir( .iter() .filter_map(|family| family.auxiliary_stem.as_deref()), ); + let car_auxiliary_stem_relation_counts = count_owned_values( + family_entries + .iter() + .filter_map(classify_car_auxiliary_stem_relation), + ); let lco_companion_stem_counts = count_named_values( family_entries .iter() @@ -373,6 +380,8 @@ pub fn inspect_engine_types_dir( ); let cgo_scalar_values_by_content_stem = build_cgo_scalar_values_by_content_stem(cgo_reports.values()); + let cgo_scalar_ladder_counts = + build_cgo_scalar_ladder_counts(cgo_scalar_values_by_content_stem.values()); let cct_identifier_counts = count_named_values( family_entries .iter() @@ -428,9 +437,11 @@ pub fn inspect_engine_types_dir( .count(), car_side_view_resource_counts, car_auxiliary_stem_counts, + car_auxiliary_stem_relation_counts, lco_companion_stem_counts, lco_body_type_label_counts, cgo_scalar_value_counts, + cgo_scalar_ladder_counts, cgo_scalar_values_by_content_stem, cct_identifier_counts, cct_value_counts, @@ -674,10 +685,34 @@ fn count_owned_values(values: impl Iterator) -> BTreeMap Option { + let auxiliary_stem = family.auxiliary_stem.as_deref()?; + let internal_stem = family.internal_stem.as_deref()?; + if auxiliary_stem == internal_stem { + return Some("matches_internal_stem".to_string()); + } + let internal_without_role_suffix = strip_terminal_role_letter(internal_stem)?; + if auxiliary_stem == internal_without_role_suffix { + return Some("matches_internal_without_role_suffix".to_string()); + } + if auxiliary_stem.eq_ignore_ascii_case(internal_without_role_suffix) { + return Some("matches_internal_without_role_suffix_casefolded".to_string()); + } + Some("distinct_auxiliary_stem".to_string()) +} + +fn strip_terminal_role_letter(value: &str) -> Option<&str> { + let last = value.chars().last()?; + matches!(last, 'L' | 'T' | 'l' | 't').then(|| { + let cutoff = value.len() - last.len_utf8(); + &value[..cutoff] + }) +} + fn build_cgo_scalar_values_by_content_stem<'a>( reports: impl Iterator, ) -> BTreeMap> { - let mut grouped = BTreeMap::>::new(); + let mut grouped = BTreeMap::>::new(); for report in reports { let Some(content_stem) = report.content_stem.as_ref() else { continue; @@ -688,13 +723,28 @@ fn build_cgo_scalar_values_by_content_stem<'a>( grouped .entry(content_stem.clone()) .or_default() - .push(format!("{leading_f32:.6}")); - } - for values in grouped.values_mut() { - values.sort(); - values.dedup(); + .push(leading_f32); } grouped + .into_iter() + .map(|(content_stem, mut values)| { + values.sort_by(f32::total_cmp); + values.dedup(); + ( + content_stem, + values + .into_iter() + .map(|value| format!("{value:.6}")) + .collect::>(), + ) + }) + .collect() +} + +fn build_cgo_scalar_ladder_counts<'a>( + ladders: impl Iterator>, +) -> BTreeMap { + count_owned_values(ladders.map(|ladder| ladder.join(" -> "))) } #[cfg(test)] @@ -824,9 +874,8 @@ mod tests { #[test] fn counts_directory_level_slot_values() { - let counts = count_named_values( - ["CarSideView_1.imb", "CarSideView_1.imb", "VL80T"].into_iter(), - ); + let counts = + count_named_values(["CarSideView_1.imb", "CarSideView_1.imb", "VL80T"].into_iter()); assert_eq!(counts.get("CarSideView_1.imb"), Some(&2)); assert_eq!(counts.get("VL80T"), Some(&1)); } @@ -868,9 +917,73 @@ mod tests { ); } + #[test] + fn classifies_car_auxiliary_stem_relations() { + let identical = EngineTypeFamilyEntry { + canonical_stem: "gp7".to_string(), + car_file: None, + lco_file: None, + cgo_file: None, + cct_file: None, + primary_display_name: None, + content_name: None, + internal_stem: Some("GP7L".to_string()), + auxiliary_stem: Some("GP7L".to_string()), + side_view_resource: None, + companion_stem: None, + body_type_label: None, + cct_identifier: None, + cct_value: None, + has_matched_locomotive_pair: false, + }; + let stripped = EngineTypeFamilyEntry { + internal_stem: Some("Class01L".to_string()), + auxiliary_stem: Some("Class01".to_string()), + ..identical.clone() + }; + let stripped_casefolded = EngineTypeFamilyEntry { + internal_stem: Some("classqjt".to_string()), + auxiliary_stem: Some("qjclasst".to_string()), + ..identical.clone() + }; + let distinct = EngineTypeFamilyEntry { + internal_stem: Some("ClassA1T".to_string()), + auxiliary_stem: Some("ClassA1L".to_string()), + ..identical + }; + + assert_eq!( + classify_car_auxiliary_stem_relation(&stripped), + Some("matches_internal_without_role_suffix".to_string()) + ); + assert_eq!( + classify_car_auxiliary_stem_relation(&stripped_casefolded), + Some("distinct_auxiliary_stem".to_string()) + ); + assert_eq!( + classify_car_auxiliary_stem_relation(&distinct), + Some("distinct_auxiliary_stem".to_string()) + ); + } + + #[test] + fn builds_cgo_scalar_ladder_counts() { + let ladders = build_cgo_scalar_ladder_counts( + [ + vec!["10.000000".to_string(), "20.000000".to_string()], + vec!["10.000000".to_string(), "20.000000".to_string()], + vec!["55.000000".to_string(), "85.000000".to_string()], + ] + .iter(), + ); + assert_eq!(ladders.get("10.000000 -> 20.000000"), Some(&2)); + assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1)); + } + #[test] fn counts_owned_value_strings() { - let counts = count_owned_values(["13".to_string(), "13".to_string(), "4".to_string()].into_iter()); + let counts = + count_owned_values(["13".to_string(), "13".to_string(), "4".to_string()].into_iter()); assert_eq!(counts.get("13"), Some(&2)); assert_eq!(counts.get("4"), Some(&1)); } diff --git a/crates/rrt-runtime/src/inspect/imb.rs b/crates/rrt-runtime/src/inspect/imb.rs index 0ef63e0..30166ac 100644 --- a/crates/rrt-runtime/src/inspect/imb.rs +++ b/crates/rrt-runtime/src/inspect/imb.rs @@ -78,8 +78,7 @@ pub fn inspect_imb_bytes(bytes: &[u8]) -> Result ClassA1L` + - `CramptonT -> CramptonL` + - `WhaleT -> WhaleL` + - `classqjl -> qjclassl` + - `classqjt -> qjclasst` - `.lco` carries one always-present primary stem at `0x04`. - `.lco` only carries meaningful secondary slots when that leading stem slot is padded: - `0x0c`: conditional companion stem such as `VL80T` or `Zephyr` @@ -22,6 +32,13 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. fixed fields unless the earlier slot is actually zero-padded. - `.cgo` looks structurally narrow right now: the checked 1.05 corpus has `37` files, all exactly `25` bytes long, each carrying one leading scalar lane plus an inline content stem at `0x04`. +- The `.cgo` leading scalar is no longer just a loose raw count. The checked 1.05 corpus now + collapses into five stable ladders: + - `10 -> 20 -> 40 -> 80` across `6` freight-car families + - `20 -> 40 -> 80` for `Tanker` + - `55 -> 85` for `Auto_Carrier` + - `6 -> 13 -> 27 -> 53` for `Passenger` + - `7 -> 13 -> 27 -> 53` for `Mail` - `.cct` remains the least ambiguous sidecar: current shipped files still look like narrow one-row text metadata. @@ -33,6 +50,7 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. - internal stem - auxiliary stem slot - side-view resource name + - auxiliary-stem relation counts across the shipped corpus - `.lco` - full internal stem - conditional companion stem slot @@ -41,14 +59,15 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. - `.cgo` - leading scalar lane - content stem + - scalar ladder counts by shared cargo-car family - `.cct` - tokenized identifier/value row ## Remaining Static Questions - `.car` - - what the `0xa2` auxiliary stem really represents across locomotive, tender, and freight-car - families: alias root, image key, or alternate content stem + - what the `0xa2` auxiliary stem really represents in the five remaining distinct cases: + alternate content root, paired tender/loco image root, or a narrower foreign-display alias - whether the trailing side-view resource can be tied cleanly to `.imb` metadata without inventing frontend semantics - `.lco` @@ -57,8 +76,8 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. - how much of the early numeric lane block can be promoted from raw `u32/f32` views into stable typed semantics without dynamic evidence - `.cgo` - - whether the leading scalar is enough to justify a named typed field, or whether it should stay - a conservative raw scalar until more binary/code correlation exists + - whether the leading scalar ladders are enough to justify a named typed field, or whether they + should stay conservative report-only ladders until more binary/code correlation exists ## Next Static Parser Work