From 2d5f5dec10c760bfccb08142b7c5b41e6c777373 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Tue, 21 Apr 2026 23:07:34 -0700 Subject: [PATCH] Summarize engine type lane families --- .../rrt-runtime/src/inspect/engine_types.rs | 111 ++++++++++++++++++ docs/rehost-queue.md | 3 +- ...ngine-types-parser-semantics-2026-04-21.md | 17 ++- 3 files changed, 128 insertions(+), 3 deletions(-) diff --git a/crates/rrt-runtime/src/inspect/engine_types.rs b/crates/rrt-runtime/src/inspect/engine_types.rs index 1b74a93..00d4226 100644 --- a/crates/rrt-runtime/src/inspect/engine_types.rs +++ b/crates/rrt-runtime/src/inspect/engine_types.rs @@ -149,6 +149,7 @@ pub struct EngineTypesInspectionReport { pub car_auxiliary_stem_relation_counts: BTreeMap, pub lco_companion_stem_counts: BTreeMap, pub lco_body_type_label_counts: BTreeMap, + pub lco_low_cardinality_lane_counts: BTreeMap>, pub cgo_scalar_value_counts: BTreeMap, pub cgo_scalar_ladder_counts: BTreeMap, pub cgo_scalar_values_by_content_stem: BTreeMap>, @@ -373,6 +374,8 @@ pub fn inspect_engine_types_dir( .iter() .filter_map(|family| family.body_type_label.as_deref()), ); + let lco_low_cardinality_lane_counts = + build_lco_low_cardinality_lane_counts(lco_reports.values()); let cgo_scalar_value_counts = count_owned_values( cgo_reports .values() @@ -440,6 +443,7 @@ pub fn inspect_engine_types_dir( car_auxiliary_stem_relation_counts, lco_companion_stem_counts, lco_body_type_label_counts, + lco_low_cardinality_lane_counts, cgo_scalar_value_counts, cgo_scalar_ladder_counts, cgo_scalar_values_by_content_stem, @@ -747,6 +751,35 @@ fn build_cgo_scalar_ladder_counts<'a>( count_owned_values(ladders.map(|ladder| ladder.join(" -> "))) } +fn build_lco_low_cardinality_lane_counts<'a>( + reports: impl Iterator, +) -> BTreeMap> { + let mut per_offset = BTreeMap::>::new(); + for report in reports { + for lane in &report.early_lanes { + *per_offset + .entry(lane.offset) + .or_default() + .entry(lane.raw_u32) + .or_insert(0) += 1; + } + } + + per_offset + .into_iter() + .filter(|(_, counts)| counts.len() <= 10) + .map(|(offset, counts)| { + ( + format!("0x{offset:04x}"), + counts + .into_iter() + .map(|(raw_u32, count)| (format!("0x{raw_u32:08x}"), count)) + .collect::>(), + ) + }) + .collect() +} + #[cfg(test)] mod tests { use super::*; @@ -980,6 +1013,84 @@ mod tests { assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1)); } + #[test] + fn builds_lco_low_cardinality_lane_counts() { + let lane_counts = build_lco_low_cardinality_lane_counts( + [ + EngineTypeLcoInspectionReport { + file_size: 0, + header_magic: None, + header_magic_hex: None, + internal_stem: None, + companion_stem: None, + body_type_label: None, + early_lanes: vec![ + EngineTypeRawLane { + offset: 0x20, + offset_hex: "0x0020".to_string(), + raw_u32: 0, + raw_u32_hex: "0x00000000".to_string(), + raw_f32: 0.0, + }, + EngineTypeRawLane { + offset: 0x24, + offset_hex: "0x0024".to_string(), + raw_u32: 1, + raw_u32_hex: "0x00000001".to_string(), + raw_f32: 0.0, + }, + ], + notes: Vec::new(), + }, + EngineTypeLcoInspectionReport { + file_size: 0, + header_magic: None, + header_magic_hex: None, + internal_stem: None, + companion_stem: None, + body_type_label: None, + early_lanes: vec![ + EngineTypeRawLane { + offset: 0x20, + offset_hex: "0x0020".to_string(), + raw_u32: 1, + raw_u32_hex: "0x00000001".to_string(), + raw_f32: 0.0, + }, + EngineTypeRawLane { + offset: 0x24, + offset_hex: "0x0024".to_string(), + raw_u32: 2, + raw_u32_hex: "0x00000002".to_string(), + raw_f32: 0.0, + }, + ], + notes: Vec::new(), + }, + ] + .iter(), + ); + + assert_eq!( + lane_counts + .get("0x0020") + .and_then(|counts| counts.get("0x00000000")), + Some(&1) + ); + assert_eq!( + lane_counts + .get("0x0020") + .and_then(|counts| counts.get("0x00000001")), + Some(&1) + ); + assert_eq!( + lane_counts + .get("0x0024") + .and_then(|counts| counts.get("0x00000002")), + Some(&1) + ); + } + #[test] fn counts_owned_value_strings() { let counts = diff --git a/docs/rehost-queue.md b/docs/rehost-queue.md index 2ad8908..8b03d60 100644 --- a/docs/rehost-queue.md +++ b/docs/rehost-queue.md @@ -13,7 +13,8 @@ This file is the short active queue for the current runtime and reverse-engineer - The active static parser head is now the `engine_types` semantics frontier. The repo now has structural inspectors for `.car`, `.lco`, `.cgo`, and `.cct`, but the binary side is still only partially semantic: the checked 1.05 corpus grounds `.car` fixed strings at `0x0c / 0x48 / 0x84` plus a second fixed stem slot at `0xa2` and a side-view resource name at `0xc0`, while `.lco` carries a stable primary stem at `0x04` and only conditional companion/body slots at `0x0c` and `0x12` when the leading stem slot is padded. The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, while `.cgo` collapses into five stable scalar ladders instead of arbitrary floats. - The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, and decide how far the `.cgo` ladders and guarded `.lco` companion lanes can be grounded without overclaiming semantics. + The early `.lco` lane block is now partially partitioned too: only offsets `0x20`, `0x34`, `0x38`, `0x3c`, `0x44`, `0x48`, and `0x54` behave like low-cardinality buckets, while the other early lanes still look high-variance. + The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, and decide how far the `.cgo` ladders plus the low-cardinality `.lco` lanes can be grounded without overclaiming semantics. Preserved checked parser detail now lives in [EngineTypes parser semantics](rehost-queue/engine-types-parser-semantics-2026-04-21.md). Preserved checked format inventory detail now lives in [RT3 format inventory](rehost-queue/format-inventory-2026-04-21.md). diff --git a/docs/rehost-queue/engine-types-parser-semantics-2026-04-21.md b/docs/rehost-queue/engine-types-parser-semantics-2026-04-21.md index 11a0a68..4655b2e 100644 --- a/docs/rehost-queue/engine-types-parser-semantics-2026-04-21.md +++ b/docs/rehost-queue/engine-types-parser-semantics-2026-04-21.md @@ -30,6 +30,16 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. - The checked 1.05 corpus (`66` `.lco` files) shows why the guard matters: long primary stems such as `AtlanticL` naturally spill across `0x0c`, so `0x0c` and `0x12` are not independent fixed fields unless the earlier slot is actually zero-padded. +- The checked 1.05 corpus now also narrows the early `.lco` lane block into two categories: + - low-cardinality lanes at `0x20`, `0x34`, `0x38`, `0x3c`, `0x44`, `0x48`, and `0x54` + - high-variance lanes at `0x24`, `0x28`, `0x2c`, `0x30`, `0x40`, `0x4c`, and `0x50` +- The low-cardinality side is now preserved directly in the inspector: + - `0x20`: only `0x00000000` or `0x0000cdcd` + - `0x34`: only `0x00000000 / 0x01000000 / 0x02000000 / 0x03000000` + - `0x38`: only `0x02000000 .. 0x0a000000` + - `0x3c` and `0x44`: always `0x00000000` + - `0x48`: only ten values in the `0x003f8000 .. 0x00412000` range + - `0x54`: only `0x00000000 .. 0x09000000` - `.cgo` looks structurally narrow right now: the checked 1.05 corpus has `37` files, all exactly `25` bytes long, each carrying one leading scalar lane plus an inline content stem at `0x04`. - The `.cgo` leading scalar is no longer just a loose raw count. The checked 1.05 corpus now @@ -56,6 +66,7 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. - conditional companion stem slot - conditional body-type label - early raw numeric lane block `0x20..0x54` + - low-cardinality lane counts for the enumerated subset of that block - `.cgo` - leading scalar lane - content stem @@ -73,8 +84,10 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed. - `.lco` - whether the guarded companion-stem slot is a tender/fallback display family, a foreign reuse key, or only a subset authoring convenience - - how much of the early numeric lane block can be promoted from raw `u32/f32` views into stable - typed semantics without dynamic evidence + - whether the newly isolated low-cardinality lane subset can be promoted from raw `u32` buckets + into named enums without overclaiming semantics + - how much of the remaining high-variance lane subset can be promoted from raw `u32/f32` views + into stable typed semantics without dynamic evidence - `.cgo` - whether the leading scalar ladders are enough to justify a named typed field, or whether they should stay conservative report-only ladders until more binary/code correlation exists