Summarize engine type lane families
This commit is contained in:
parent
f3c3eb7262
commit
2d5f5dec10
3 changed files with 128 additions and 3 deletions
|
|
@ -149,6 +149,7 @@ pub struct EngineTypesInspectionReport {
|
||||||
pub car_auxiliary_stem_relation_counts: BTreeMap<String, usize>,
|
pub car_auxiliary_stem_relation_counts: BTreeMap<String, usize>,
|
||||||
pub lco_companion_stem_counts: BTreeMap<String, usize>,
|
pub lco_companion_stem_counts: BTreeMap<String, usize>,
|
||||||
pub lco_body_type_label_counts: BTreeMap<String, usize>,
|
pub lco_body_type_label_counts: BTreeMap<String, usize>,
|
||||||
|
pub lco_low_cardinality_lane_counts: BTreeMap<String, BTreeMap<String, usize>>,
|
||||||
pub cgo_scalar_value_counts: BTreeMap<String, usize>,
|
pub cgo_scalar_value_counts: BTreeMap<String, usize>,
|
||||||
pub cgo_scalar_ladder_counts: BTreeMap<String, usize>,
|
pub cgo_scalar_ladder_counts: BTreeMap<String, usize>,
|
||||||
pub cgo_scalar_values_by_content_stem: BTreeMap<String, Vec<String>>,
|
pub cgo_scalar_values_by_content_stem: BTreeMap<String, Vec<String>>,
|
||||||
|
|
@ -373,6 +374,8 @@ pub fn inspect_engine_types_dir(
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|family| family.body_type_label.as_deref()),
|
.filter_map(|family| family.body_type_label.as_deref()),
|
||||||
);
|
);
|
||||||
|
let lco_low_cardinality_lane_counts =
|
||||||
|
build_lco_low_cardinality_lane_counts(lco_reports.values());
|
||||||
let cgo_scalar_value_counts = count_owned_values(
|
let cgo_scalar_value_counts = count_owned_values(
|
||||||
cgo_reports
|
cgo_reports
|
||||||
.values()
|
.values()
|
||||||
|
|
@ -440,6 +443,7 @@ pub fn inspect_engine_types_dir(
|
||||||
car_auxiliary_stem_relation_counts,
|
car_auxiliary_stem_relation_counts,
|
||||||
lco_companion_stem_counts,
|
lco_companion_stem_counts,
|
||||||
lco_body_type_label_counts,
|
lco_body_type_label_counts,
|
||||||
|
lco_low_cardinality_lane_counts,
|
||||||
cgo_scalar_value_counts,
|
cgo_scalar_value_counts,
|
||||||
cgo_scalar_ladder_counts,
|
cgo_scalar_ladder_counts,
|
||||||
cgo_scalar_values_by_content_stem,
|
cgo_scalar_values_by_content_stem,
|
||||||
|
|
@ -747,6 +751,35 @@ fn build_cgo_scalar_ladder_counts<'a>(
|
||||||
count_owned_values(ladders.map(|ladder| ladder.join(" -> ")))
|
count_owned_values(ladders.map(|ladder| ladder.join(" -> ")))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_lco_low_cardinality_lane_counts<'a>(
|
||||||
|
reports: impl Iterator<Item = &'a EngineTypeLcoInspectionReport>,
|
||||||
|
) -> BTreeMap<String, BTreeMap<String, usize>> {
|
||||||
|
let mut per_offset = BTreeMap::<usize, BTreeMap<u32, usize>>::new();
|
||||||
|
for report in reports {
|
||||||
|
for lane in &report.early_lanes {
|
||||||
|
*per_offset
|
||||||
|
.entry(lane.offset)
|
||||||
|
.or_default()
|
||||||
|
.entry(lane.raw_u32)
|
||||||
|
.or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
per_offset
|
||||||
|
.into_iter()
|
||||||
|
.filter(|(_, counts)| counts.len() <= 10)
|
||||||
|
.map(|(offset, counts)| {
|
||||||
|
(
|
||||||
|
format!("0x{offset:04x}"),
|
||||||
|
counts
|
||||||
|
.into_iter()
|
||||||
|
.map(|(raw_u32, count)| (format!("0x{raw_u32:08x}"), count))
|
||||||
|
.collect::<BTreeMap<_, _>>(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
@ -980,6 +1013,84 @@ mod tests {
|
||||||
assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1));
|
assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn builds_lco_low_cardinality_lane_counts() {
|
||||||
|
let lane_counts = build_lco_low_cardinality_lane_counts(
|
||||||
|
[
|
||||||
|
EngineTypeLcoInspectionReport {
|
||||||
|
file_size: 0,
|
||||||
|
header_magic: None,
|
||||||
|
header_magic_hex: None,
|
||||||
|
internal_stem: None,
|
||||||
|
companion_stem: None,
|
||||||
|
body_type_label: None,
|
||||||
|
early_lanes: vec![
|
||||||
|
EngineTypeRawLane {
|
||||||
|
offset: 0x20,
|
||||||
|
offset_hex: "0x0020".to_string(),
|
||||||
|
raw_u32: 0,
|
||||||
|
raw_u32_hex: "0x00000000".to_string(),
|
||||||
|
raw_f32: 0.0,
|
||||||
|
},
|
||||||
|
EngineTypeRawLane {
|
||||||
|
offset: 0x24,
|
||||||
|
offset_hex: "0x0024".to_string(),
|
||||||
|
raw_u32: 1,
|
||||||
|
raw_u32_hex: "0x00000001".to_string(),
|
||||||
|
raw_f32: 0.0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
notes: Vec::new(),
|
||||||
|
},
|
||||||
|
EngineTypeLcoInspectionReport {
|
||||||
|
file_size: 0,
|
||||||
|
header_magic: None,
|
||||||
|
header_magic_hex: None,
|
||||||
|
internal_stem: None,
|
||||||
|
companion_stem: None,
|
||||||
|
body_type_label: None,
|
||||||
|
early_lanes: vec![
|
||||||
|
EngineTypeRawLane {
|
||||||
|
offset: 0x20,
|
||||||
|
offset_hex: "0x0020".to_string(),
|
||||||
|
raw_u32: 1,
|
||||||
|
raw_u32_hex: "0x00000001".to_string(),
|
||||||
|
raw_f32: 0.0,
|
||||||
|
},
|
||||||
|
EngineTypeRawLane {
|
||||||
|
offset: 0x24,
|
||||||
|
offset_hex: "0x0024".to_string(),
|
||||||
|
raw_u32: 2,
|
||||||
|
raw_u32_hex: "0x00000002".to_string(),
|
||||||
|
raw_f32: 0.0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
notes: Vec::new(),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
.iter(),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
lane_counts
|
||||||
|
.get("0x0020")
|
||||||
|
.and_then(|counts| counts.get("0x00000000")),
|
||||||
|
Some(&1)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
lane_counts
|
||||||
|
.get("0x0020")
|
||||||
|
.and_then(|counts| counts.get("0x00000001")),
|
||||||
|
Some(&1)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
lane_counts
|
||||||
|
.get("0x0024")
|
||||||
|
.and_then(|counts| counts.get("0x00000002")),
|
||||||
|
Some(&1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn counts_owned_value_strings() {
|
fn counts_owned_value_strings() {
|
||||||
let counts =
|
let counts =
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,8 @@ This file is the short active queue for the current runtime and reverse-engineer
|
||||||
- The active static parser head is now the `engine_types` semantics frontier.
|
- The active static parser head is now the `engine_types` semantics frontier.
|
||||||
The repo now has structural inspectors for `.car`, `.lco`, `.cgo`, and `.cct`, but the binary side is still only partially semantic: the checked 1.05 corpus grounds `.car` fixed strings at `0x0c / 0x48 / 0x84` plus a second fixed stem slot at `0xa2` and a side-view resource name at `0xc0`, while `.lco` carries a stable primary stem at `0x04` and only conditional companion/body slots at `0x0c` and `0x12` when the leading stem slot is padded.
|
The repo now has structural inspectors for `.car`, `.lco`, `.cgo`, and `.cct`, but the binary side is still only partially semantic: the checked 1.05 corpus grounds `.car` fixed strings at `0x0c / 0x48 / 0x84` plus a second fixed stem slot at `0xa2` and a side-view resource name at `0xc0`, while `.lco` carries a stable primary stem at `0x04` and only conditional companion/body slots at `0x0c` and `0x12` when the leading stem slot is padded.
|
||||||
The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, while `.cgo` collapses into five stable scalar ladders instead of arbitrary floats.
|
The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, while `.cgo` collapses into five stable scalar ladders instead of arbitrary floats.
|
||||||
The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, and decide how far the `.cgo` ladders and guarded `.lco` companion lanes can be grounded without overclaiming semantics.
|
The early `.lco` lane block is now partially partitioned too: only offsets `0x20`, `0x34`, `0x38`, `0x3c`, `0x44`, `0x48`, and `0x54` behave like low-cardinality buckets, while the other early lanes still look high-variance.
|
||||||
|
The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, and decide how far the `.cgo` ladders plus the low-cardinality `.lco` lanes can be grounded without overclaiming semantics.
|
||||||
Preserved checked parser detail now lives in [EngineTypes parser semantics](rehost-queue/engine-types-parser-semantics-2026-04-21.md).
|
Preserved checked parser detail now lives in [EngineTypes parser semantics](rehost-queue/engine-types-parser-semantics-2026-04-21.md).
|
||||||
Preserved checked format inventory detail now lives in [RT3 format inventory](rehost-queue/format-inventory-2026-04-21.md).
|
Preserved checked format inventory detail now lives in [RT3 format inventory](rehost-queue/format-inventory-2026-04-21.md).
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,16 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
|
||||||
- The checked 1.05 corpus (`66` `.lco` files) shows why the guard matters: long primary stems
|
- The checked 1.05 corpus (`66` `.lco` files) shows why the guard matters: long primary stems
|
||||||
such as `AtlanticL` naturally spill across `0x0c`, so `0x0c` and `0x12` are not independent
|
such as `AtlanticL` naturally spill across `0x0c`, so `0x0c` and `0x12` are not independent
|
||||||
fixed fields unless the earlier slot is actually zero-padded.
|
fixed fields unless the earlier slot is actually zero-padded.
|
||||||
|
- The checked 1.05 corpus now also narrows the early `.lco` lane block into two categories:
|
||||||
|
- low-cardinality lanes at `0x20`, `0x34`, `0x38`, `0x3c`, `0x44`, `0x48`, and `0x54`
|
||||||
|
- high-variance lanes at `0x24`, `0x28`, `0x2c`, `0x30`, `0x40`, `0x4c`, and `0x50`
|
||||||
|
- The low-cardinality side is now preserved directly in the inspector:
|
||||||
|
- `0x20`: only `0x00000000` or `0x0000cdcd`
|
||||||
|
- `0x34`: only `0x00000000 / 0x01000000 / 0x02000000 / 0x03000000`
|
||||||
|
- `0x38`: only `0x02000000 .. 0x0a000000`
|
||||||
|
- `0x3c` and `0x44`: always `0x00000000`
|
||||||
|
- `0x48`: only ten values in the `0x003f8000 .. 0x00412000` range
|
||||||
|
- `0x54`: only `0x00000000 .. 0x09000000`
|
||||||
- `.cgo` looks structurally narrow right now: the checked 1.05 corpus has `37` files, all exactly
|
- `.cgo` looks structurally narrow right now: the checked 1.05 corpus has `37` files, all exactly
|
||||||
`25` bytes long, each carrying one leading scalar lane plus an inline content stem at `0x04`.
|
`25` bytes long, each carrying one leading scalar lane plus an inline content stem at `0x04`.
|
||||||
- The `.cgo` leading scalar is no longer just a loose raw count. The checked 1.05 corpus now
|
- The `.cgo` leading scalar is no longer just a loose raw count. The checked 1.05 corpus now
|
||||||
|
|
@ -56,6 +66,7 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
|
||||||
- conditional companion stem slot
|
- conditional companion stem slot
|
||||||
- conditional body-type label
|
- conditional body-type label
|
||||||
- early raw numeric lane block `0x20..0x54`
|
- early raw numeric lane block `0x20..0x54`
|
||||||
|
- low-cardinality lane counts for the enumerated subset of that block
|
||||||
- `.cgo`
|
- `.cgo`
|
||||||
- leading scalar lane
|
- leading scalar lane
|
||||||
- content stem
|
- content stem
|
||||||
|
|
@ -73,8 +84,10 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
|
||||||
- `.lco`
|
- `.lco`
|
||||||
- whether the guarded companion-stem slot is a tender/fallback display family, a foreign reuse
|
- whether the guarded companion-stem slot is a tender/fallback display family, a foreign reuse
|
||||||
key, or only a subset authoring convenience
|
key, or only a subset authoring convenience
|
||||||
- how much of the early numeric lane block can be promoted from raw `u32/f32` views into stable
|
- whether the newly isolated low-cardinality lane subset can be promoted from raw `u32` buckets
|
||||||
typed semantics without dynamic evidence
|
into named enums without overclaiming semantics
|
||||||
|
- how much of the remaining high-variance lane subset can be promoted from raw `u32/f32` views
|
||||||
|
into stable typed semantics without dynamic evidence
|
||||||
- `.cgo`
|
- `.cgo`
|
||||||
- whether the leading scalar ladders are enough to justify a named typed field, or whether they
|
- whether the leading scalar ladders are enough to justify a named typed field, or whether they
|
||||||
should stay conservative report-only ladders until more binary/code correlation exists
|
should stay conservative report-only ladders until more binary/code correlation exists
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue