Summarize engine type lane families

This commit is contained in:
Jan Petykiewicz 2026-04-21 23:07:34 -07:00
commit 2d5f5dec10
3 changed files with 128 additions and 3 deletions

View file

@ -149,6 +149,7 @@ pub struct EngineTypesInspectionReport {
pub car_auxiliary_stem_relation_counts: BTreeMap<String, usize>, pub car_auxiliary_stem_relation_counts: BTreeMap<String, usize>,
pub lco_companion_stem_counts: BTreeMap<String, usize>, pub lco_companion_stem_counts: BTreeMap<String, usize>,
pub lco_body_type_label_counts: BTreeMap<String, usize>, pub lco_body_type_label_counts: BTreeMap<String, usize>,
pub lco_low_cardinality_lane_counts: BTreeMap<String, BTreeMap<String, usize>>,
pub cgo_scalar_value_counts: BTreeMap<String, usize>, pub cgo_scalar_value_counts: BTreeMap<String, usize>,
pub cgo_scalar_ladder_counts: BTreeMap<String, usize>, pub cgo_scalar_ladder_counts: BTreeMap<String, usize>,
pub cgo_scalar_values_by_content_stem: BTreeMap<String, Vec<String>>, pub cgo_scalar_values_by_content_stem: BTreeMap<String, Vec<String>>,
@ -373,6 +374,8 @@ pub fn inspect_engine_types_dir(
.iter() .iter()
.filter_map(|family| family.body_type_label.as_deref()), .filter_map(|family| family.body_type_label.as_deref()),
); );
let lco_low_cardinality_lane_counts =
build_lco_low_cardinality_lane_counts(lco_reports.values());
let cgo_scalar_value_counts = count_owned_values( let cgo_scalar_value_counts = count_owned_values(
cgo_reports cgo_reports
.values() .values()
@ -440,6 +443,7 @@ pub fn inspect_engine_types_dir(
car_auxiliary_stem_relation_counts, car_auxiliary_stem_relation_counts,
lco_companion_stem_counts, lco_companion_stem_counts,
lco_body_type_label_counts, lco_body_type_label_counts,
lco_low_cardinality_lane_counts,
cgo_scalar_value_counts, cgo_scalar_value_counts,
cgo_scalar_ladder_counts, cgo_scalar_ladder_counts,
cgo_scalar_values_by_content_stem, cgo_scalar_values_by_content_stem,
@ -747,6 +751,35 @@ fn build_cgo_scalar_ladder_counts<'a>(
count_owned_values(ladders.map(|ladder| ladder.join(" -> "))) count_owned_values(ladders.map(|ladder| ladder.join(" -> ")))
} }
fn build_lco_low_cardinality_lane_counts<'a>(
reports: impl Iterator<Item = &'a EngineTypeLcoInspectionReport>,
) -> BTreeMap<String, BTreeMap<String, usize>> {
let mut per_offset = BTreeMap::<usize, BTreeMap<u32, usize>>::new();
for report in reports {
for lane in &report.early_lanes {
*per_offset
.entry(lane.offset)
.or_default()
.entry(lane.raw_u32)
.or_insert(0) += 1;
}
}
per_offset
.into_iter()
.filter(|(_, counts)| counts.len() <= 10)
.map(|(offset, counts)| {
(
format!("0x{offset:04x}"),
counts
.into_iter()
.map(|(raw_u32, count)| (format!("0x{raw_u32:08x}"), count))
.collect::<BTreeMap<_, _>>(),
)
})
.collect()
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -980,6 +1013,84 @@ mod tests {
assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1)); assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1));
} }
#[test]
fn builds_lco_low_cardinality_lane_counts() {
let lane_counts = build_lco_low_cardinality_lane_counts(
[
EngineTypeLcoInspectionReport {
file_size: 0,
header_magic: None,
header_magic_hex: None,
internal_stem: None,
companion_stem: None,
body_type_label: None,
early_lanes: vec![
EngineTypeRawLane {
offset: 0x20,
offset_hex: "0x0020".to_string(),
raw_u32: 0,
raw_u32_hex: "0x00000000".to_string(),
raw_f32: 0.0,
},
EngineTypeRawLane {
offset: 0x24,
offset_hex: "0x0024".to_string(),
raw_u32: 1,
raw_u32_hex: "0x00000001".to_string(),
raw_f32: 0.0,
},
],
notes: Vec::new(),
},
EngineTypeLcoInspectionReport {
file_size: 0,
header_magic: None,
header_magic_hex: None,
internal_stem: None,
companion_stem: None,
body_type_label: None,
early_lanes: vec![
EngineTypeRawLane {
offset: 0x20,
offset_hex: "0x0020".to_string(),
raw_u32: 1,
raw_u32_hex: "0x00000001".to_string(),
raw_f32: 0.0,
},
EngineTypeRawLane {
offset: 0x24,
offset_hex: "0x0024".to_string(),
raw_u32: 2,
raw_u32_hex: "0x00000002".to_string(),
raw_f32: 0.0,
},
],
notes: Vec::new(),
},
]
.iter(),
);
assert_eq!(
lane_counts
.get("0x0020")
.and_then(|counts| counts.get("0x00000000")),
Some(&1)
);
assert_eq!(
lane_counts
.get("0x0020")
.and_then(|counts| counts.get("0x00000001")),
Some(&1)
);
assert_eq!(
lane_counts
.get("0x0024")
.and_then(|counts| counts.get("0x00000002")),
Some(&1)
);
}
#[test] #[test]
fn counts_owned_value_strings() { fn counts_owned_value_strings() {
let counts = let counts =

View file

@ -13,7 +13,8 @@ This file is the short active queue for the current runtime and reverse-engineer
- The active static parser head is now the `engine_types` semantics frontier. - The active static parser head is now the `engine_types` semantics frontier.
The repo now has structural inspectors for `.car`, `.lco`, `.cgo`, and `.cct`, but the binary side is still only partially semantic: the checked 1.05 corpus grounds `.car` fixed strings at `0x0c / 0x48 / 0x84` plus a second fixed stem slot at `0xa2` and a side-view resource name at `0xc0`, while `.lco` carries a stable primary stem at `0x04` and only conditional companion/body slots at `0x0c` and `0x12` when the leading stem slot is padded. The repo now has structural inspectors for `.car`, `.lco`, `.cgo`, and `.cct`, but the binary side is still only partially semantic: the checked 1.05 corpus grounds `.car` fixed strings at `0x0c / 0x48 / 0x84` plus a second fixed stem slot at `0xa2` and a side-view resource name at `0xc0`, while `.lco` carries a stable primary stem at `0x04` and only conditional companion/body slots at `0x0c` and `0x12` when the leading stem slot is padded.
The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, while `.cgo` collapses into five stable scalar ladders instead of arbitrary floats. The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, while `.cgo` collapses into five stable scalar ladders instead of arbitrary floats.
The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, and decide how far the `.cgo` ladders and guarded `.lco` companion lanes can be grounded without overclaiming semantics. The early `.lco` lane block is now partially partitioned too: only offsets `0x20`, `0x34`, `0x38`, `0x3c`, `0x44`, `0x48`, and `0x54` behave like low-cardinality buckets, while the other early lanes still look high-variance.
The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, and decide how far the `.cgo` ladders plus the low-cardinality `.lco` lanes can be grounded without overclaiming semantics.
Preserved checked parser detail now lives in [EngineTypes parser semantics](rehost-queue/engine-types-parser-semantics-2026-04-21.md). Preserved checked parser detail now lives in [EngineTypes parser semantics](rehost-queue/engine-types-parser-semantics-2026-04-21.md).
Preserved checked format inventory detail now lives in [RT3 format inventory](rehost-queue/format-inventory-2026-04-21.md). Preserved checked format inventory detail now lives in [RT3 format inventory](rehost-queue/format-inventory-2026-04-21.md).

View file

@ -30,6 +30,16 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
- The checked 1.05 corpus (`66` `.lco` files) shows why the guard matters: long primary stems - The checked 1.05 corpus (`66` `.lco` files) shows why the guard matters: long primary stems
such as `AtlanticL` naturally spill across `0x0c`, so `0x0c` and `0x12` are not independent such as `AtlanticL` naturally spill across `0x0c`, so `0x0c` and `0x12` are not independent
fixed fields unless the earlier slot is actually zero-padded. fixed fields unless the earlier slot is actually zero-padded.
- The checked 1.05 corpus now also narrows the early `.lco` lane block into two categories:
- low-cardinality lanes at `0x20`, `0x34`, `0x38`, `0x3c`, `0x44`, `0x48`, and `0x54`
- high-variance lanes at `0x24`, `0x28`, `0x2c`, `0x30`, `0x40`, `0x4c`, and `0x50`
- The low-cardinality side is now preserved directly in the inspector:
- `0x20`: only `0x00000000` or `0x0000cdcd`
- `0x34`: only `0x00000000 / 0x01000000 / 0x02000000 / 0x03000000`
- `0x38`: only `0x02000000 .. 0x0a000000`
- `0x3c` and `0x44`: always `0x00000000`
- `0x48`: only ten values in the `0x003f8000 .. 0x00412000` range
- `0x54`: only `0x00000000 .. 0x09000000`
- `.cgo` looks structurally narrow right now: the checked 1.05 corpus has `37` files, all exactly - `.cgo` looks structurally narrow right now: the checked 1.05 corpus has `37` files, all exactly
`25` bytes long, each carrying one leading scalar lane plus an inline content stem at `0x04`. `25` bytes long, each carrying one leading scalar lane plus an inline content stem at `0x04`.
- The `.cgo` leading scalar is no longer just a loose raw count. The checked 1.05 corpus now - The `.cgo` leading scalar is no longer just a loose raw count. The checked 1.05 corpus now
@ -56,6 +66,7 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
- conditional companion stem slot - conditional companion stem slot
- conditional body-type label - conditional body-type label
- early raw numeric lane block `0x20..0x54` - early raw numeric lane block `0x20..0x54`
- low-cardinality lane counts for the enumerated subset of that block
- `.cgo` - `.cgo`
- leading scalar lane - leading scalar lane
- content stem - content stem
@ -73,8 +84,10 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
- `.lco` - `.lco`
- whether the guarded companion-stem slot is a tender/fallback display family, a foreign reuse - whether the guarded companion-stem slot is a tender/fallback display family, a foreign reuse
key, or only a subset authoring convenience key, or only a subset authoring convenience
- how much of the early numeric lane block can be promoted from raw `u32/f32` views into stable - whether the newly isolated low-cardinality lane subset can be promoted from raw `u32` buckets
typed semantics without dynamic evidence into named enums without overclaiming semantics
- how much of the remaining high-variance lane subset can be promoted from raw `u32/f32` views
into stable typed semantics without dynamic evidence
- `.cgo` - `.cgo`
- whether the leading scalar ladders are enough to justify a named typed field, or whether they - whether the leading scalar ladders are enough to justify a named typed field, or whether they
should stay conservative report-only ladders until more binary/code correlation exists should stay conservative report-only ladders until more binary/code correlation exists