Classify engine type parser families
This commit is contained in:
parent
4942824526
commit
f3c3eb7262
4 changed files with 149 additions and 17 deletions
|
|
@ -146,9 +146,11 @@ pub struct EngineTypesInspectionReport {
|
|||
pub unmatched_cct_file_count: usize,
|
||||
pub car_side_view_resource_counts: BTreeMap<String, usize>,
|
||||
pub car_auxiliary_stem_counts: BTreeMap<String, usize>,
|
||||
pub car_auxiliary_stem_relation_counts: BTreeMap<String, usize>,
|
||||
pub lco_companion_stem_counts: BTreeMap<String, usize>,
|
||||
pub lco_body_type_label_counts: BTreeMap<String, usize>,
|
||||
pub cgo_scalar_value_counts: BTreeMap<String, usize>,
|
||||
pub cgo_scalar_ladder_counts: BTreeMap<String, usize>,
|
||||
pub cgo_scalar_values_by_content_stem: BTreeMap<String, Vec<String>>,
|
||||
pub cct_identifier_counts: BTreeMap<String, usize>,
|
||||
pub cct_value_counts: BTreeMap<String, usize>,
|
||||
|
|
@ -356,6 +358,11 @@ pub fn inspect_engine_types_dir(
|
|||
.iter()
|
||||
.filter_map(|family| family.auxiliary_stem.as_deref()),
|
||||
);
|
||||
let car_auxiliary_stem_relation_counts = count_owned_values(
|
||||
family_entries
|
||||
.iter()
|
||||
.filter_map(classify_car_auxiliary_stem_relation),
|
||||
);
|
||||
let lco_companion_stem_counts = count_named_values(
|
||||
family_entries
|
||||
.iter()
|
||||
|
|
@ -373,6 +380,8 @@ pub fn inspect_engine_types_dir(
|
|||
);
|
||||
let cgo_scalar_values_by_content_stem =
|
||||
build_cgo_scalar_values_by_content_stem(cgo_reports.values());
|
||||
let cgo_scalar_ladder_counts =
|
||||
build_cgo_scalar_ladder_counts(cgo_scalar_values_by_content_stem.values());
|
||||
let cct_identifier_counts = count_named_values(
|
||||
family_entries
|
||||
.iter()
|
||||
|
|
@ -428,9 +437,11 @@ pub fn inspect_engine_types_dir(
|
|||
.count(),
|
||||
car_side_view_resource_counts,
|
||||
car_auxiliary_stem_counts,
|
||||
car_auxiliary_stem_relation_counts,
|
||||
lco_companion_stem_counts,
|
||||
lco_body_type_label_counts,
|
||||
cgo_scalar_value_counts,
|
||||
cgo_scalar_ladder_counts,
|
||||
cgo_scalar_values_by_content_stem,
|
||||
cct_identifier_counts,
|
||||
cct_value_counts,
|
||||
|
|
@ -674,10 +685,34 @@ fn count_owned_values(values: impl Iterator<Item = String>) -> BTreeMap<String,
|
|||
counts
|
||||
}
|
||||
|
||||
fn classify_car_auxiliary_stem_relation(family: &EngineTypeFamilyEntry) -> Option<String> {
|
||||
let auxiliary_stem = family.auxiliary_stem.as_deref()?;
|
||||
let internal_stem = family.internal_stem.as_deref()?;
|
||||
if auxiliary_stem == internal_stem {
|
||||
return Some("matches_internal_stem".to_string());
|
||||
}
|
||||
let internal_without_role_suffix = strip_terminal_role_letter(internal_stem)?;
|
||||
if auxiliary_stem == internal_without_role_suffix {
|
||||
return Some("matches_internal_without_role_suffix".to_string());
|
||||
}
|
||||
if auxiliary_stem.eq_ignore_ascii_case(internal_without_role_suffix) {
|
||||
return Some("matches_internal_without_role_suffix_casefolded".to_string());
|
||||
}
|
||||
Some("distinct_auxiliary_stem".to_string())
|
||||
}
|
||||
|
||||
fn strip_terminal_role_letter(value: &str) -> Option<&str> {
|
||||
let last = value.chars().last()?;
|
||||
matches!(last, 'L' | 'T' | 'l' | 't').then(|| {
|
||||
let cutoff = value.len() - last.len_utf8();
|
||||
&value[..cutoff]
|
||||
})
|
||||
}
|
||||
|
||||
fn build_cgo_scalar_values_by_content_stem<'a>(
|
||||
reports: impl Iterator<Item = &'a EngineTypeCgoInspectionReport>,
|
||||
) -> BTreeMap<String, Vec<String>> {
|
||||
let mut grouped = BTreeMap::<String, Vec<String>>::new();
|
||||
let mut grouped = BTreeMap::<String, Vec<f32>>::new();
|
||||
for report in reports {
|
||||
let Some(content_stem) = report.content_stem.as_ref() else {
|
||||
continue;
|
||||
|
|
@ -688,13 +723,28 @@ fn build_cgo_scalar_values_by_content_stem<'a>(
|
|||
grouped
|
||||
.entry(content_stem.clone())
|
||||
.or_default()
|
||||
.push(format!("{leading_f32:.6}"));
|
||||
}
|
||||
for values in grouped.values_mut() {
|
||||
values.sort();
|
||||
values.dedup();
|
||||
.push(leading_f32);
|
||||
}
|
||||
grouped
|
||||
.into_iter()
|
||||
.map(|(content_stem, mut values)| {
|
||||
values.sort_by(f32::total_cmp);
|
||||
values.dedup();
|
||||
(
|
||||
content_stem,
|
||||
values
|
||||
.into_iter()
|
||||
.map(|value| format!("{value:.6}"))
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_cgo_scalar_ladder_counts<'a>(
|
||||
ladders: impl Iterator<Item = &'a Vec<String>>,
|
||||
) -> BTreeMap<String, usize> {
|
||||
count_owned_values(ladders.map(|ladder| ladder.join(" -> ")))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -824,9 +874,8 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn counts_directory_level_slot_values() {
|
||||
let counts = count_named_values(
|
||||
["CarSideView_1.imb", "CarSideView_1.imb", "VL80T"].into_iter(),
|
||||
);
|
||||
let counts =
|
||||
count_named_values(["CarSideView_1.imb", "CarSideView_1.imb", "VL80T"].into_iter());
|
||||
assert_eq!(counts.get("CarSideView_1.imb"), Some(&2));
|
||||
assert_eq!(counts.get("VL80T"), Some(&1));
|
||||
}
|
||||
|
|
@ -868,9 +917,73 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_car_auxiliary_stem_relations() {
|
||||
let identical = EngineTypeFamilyEntry {
|
||||
canonical_stem: "gp7".to_string(),
|
||||
car_file: None,
|
||||
lco_file: None,
|
||||
cgo_file: None,
|
||||
cct_file: None,
|
||||
primary_display_name: None,
|
||||
content_name: None,
|
||||
internal_stem: Some("GP7L".to_string()),
|
||||
auxiliary_stem: Some("GP7L".to_string()),
|
||||
side_view_resource: None,
|
||||
companion_stem: None,
|
||||
body_type_label: None,
|
||||
cct_identifier: None,
|
||||
cct_value: None,
|
||||
has_matched_locomotive_pair: false,
|
||||
};
|
||||
let stripped = EngineTypeFamilyEntry {
|
||||
internal_stem: Some("Class01L".to_string()),
|
||||
auxiliary_stem: Some("Class01".to_string()),
|
||||
..identical.clone()
|
||||
};
|
||||
let stripped_casefolded = EngineTypeFamilyEntry {
|
||||
internal_stem: Some("classqjt".to_string()),
|
||||
auxiliary_stem: Some("qjclasst".to_string()),
|
||||
..identical.clone()
|
||||
};
|
||||
let distinct = EngineTypeFamilyEntry {
|
||||
internal_stem: Some("ClassA1T".to_string()),
|
||||
auxiliary_stem: Some("ClassA1L".to_string()),
|
||||
..identical
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
classify_car_auxiliary_stem_relation(&stripped),
|
||||
Some("matches_internal_without_role_suffix".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
classify_car_auxiliary_stem_relation(&stripped_casefolded),
|
||||
Some("distinct_auxiliary_stem".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
classify_car_auxiliary_stem_relation(&distinct),
|
||||
Some("distinct_auxiliary_stem".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builds_cgo_scalar_ladder_counts() {
|
||||
let ladders = build_cgo_scalar_ladder_counts(
|
||||
[
|
||||
vec!["10.000000".to_string(), "20.000000".to_string()],
|
||||
vec!["10.000000".to_string(), "20.000000".to_string()],
|
||||
vec!["55.000000".to_string(), "85.000000".to_string()],
|
||||
]
|
||||
.iter(),
|
||||
);
|
||||
assert_eq!(ladders.get("10.000000 -> 20.000000"), Some(&2));
|
||||
assert_eq!(ladders.get("55.000000 -> 85.000000"), Some(&1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn counts_owned_value_strings() {
|
||||
let counts = count_owned_values(["13".to_string(), "13".to_string(), "4".to_string()].into_iter());
|
||||
let counts =
|
||||
count_owned_values(["13".to_string(), "13".to_string(), "4".to_string()].into_iter());
|
||||
assert_eq!(counts.get("13"), Some(&2));
|
||||
assert_eq!(counts.get("4"), Some(&1));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,8 +78,7 @@ pub fn inspect_imb_bytes(bytes: &[u8]) -> Result<ImbInspectionReport, Box<dyn st
|
|||
let target_screen_width = find_scalar_i64(&entries, "TGATargetScreenWidth");
|
||||
let target_screen_height = find_scalar_i64(&entries, "TGATargetScreenHeight");
|
||||
let scaleable = find_scalar_i64(&entries, "Scaleable").map(|value| value != 0);
|
||||
let max_percent_of_interface_vram =
|
||||
find_scalar_f64(&entries, "MaxPercentOfInterfaceVRAM");
|
||||
let max_percent_of_interface_vram = find_scalar_f64(&entries, "MaxPercentOfInterfaceVRAM");
|
||||
let image_rect = find_i64_quad(&entries, "ImageWH");
|
||||
|
||||
Ok(ImbInspectionReport {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@ This file is the short active queue for the current runtime and reverse-engineer
|
|||
|
||||
- The active static parser head is now the `engine_types` semantics frontier.
|
||||
The repo now has structural inspectors for `.car`, `.lco`, `.cgo`, and `.cct`, but the binary side is still only partially semantic: the checked 1.05 corpus grounds `.car` fixed strings at `0x0c / 0x48 / 0x84` plus a second fixed stem slot at `0xa2` and a side-view resource name at `0xc0`, while `.lco` carries a stable primary stem at `0x04` and only conditional companion/body slots at `0x0c` and `0x12` when the leading stem slot is padded.
|
||||
The next honest static work is to keep promoting those fixed lanes into stable parser fields and decide how far `.cgo` and the remaining `EngineTypes` sidecars can be grounded without overclaiming semantics.
|
||||
The checked 1.05 corpus now also splits `.car` auxiliary stems into `126` direct matches, `14` role-neutral roots, and only `5` truly distinct cases, while `.cgo` collapses into five stable scalar ladders instead of arbitrary floats.
|
||||
The next honest static work is to keep promoting those fixed lanes into stable parser fields, explain the five remaining distinct auxiliary-stem cases, and decide how far the `.cgo` ladders and guarded `.lco` companion lanes can be grounded without overclaiming semantics.
|
||||
Preserved checked parser detail now lives in [EngineTypes parser semantics](rehost-queue/engine-types-parser-semantics-2026-04-21.md).
|
||||
Preserved checked format inventory detail now lives in [RT3 format inventory](rehost-queue/format-inventory-2026-04-21.md).
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,16 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
|
|||
- `0xc0`: side-view resource name such as `CarSideView_1.imb`
|
||||
- The checked 1.05 corpus (`145` `.car` files) carries all five of those `.car` slots on every
|
||||
file inspected so far.
|
||||
- The checked 1.05 corpus now also grounds the `0xa2` relation split:
|
||||
- `126` files: `auxiliary_stem == internal_stem`
|
||||
- `14` files: `auxiliary_stem == internal_stem` without a trailing role suffix (`L` / `T`)
|
||||
- `5` files: truly distinct auxiliary stems
|
||||
- Those five distinct auxiliary-stem cases are narrow and specific:
|
||||
- `ClassA1T -> ClassA1L`
|
||||
- `CramptonT -> CramptonL`
|
||||
- `WhaleT -> WhaleL`
|
||||
- `classqjl -> qjclassl`
|
||||
- `classqjt -> qjclasst`
|
||||
- `.lco` carries one always-present primary stem at `0x04`.
|
||||
- `.lco` only carries meaningful secondary slots when that leading stem slot is padded:
|
||||
- `0x0c`: conditional companion stem such as `VL80T` or `Zephyr`
|
||||
|
|
@ -22,6 +32,13 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
|
|||
fixed fields unless the earlier slot is actually zero-padded.
|
||||
- `.cgo` looks structurally narrow right now: the checked 1.05 corpus has `37` files, all exactly
|
||||
`25` bytes long, each carrying one leading scalar lane plus an inline content stem at `0x04`.
|
||||
- The `.cgo` leading scalar is no longer just a loose raw count. The checked 1.05 corpus now
|
||||
collapses into five stable ladders:
|
||||
- `10 -> 20 -> 40 -> 80` across `6` freight-car families
|
||||
- `20 -> 40 -> 80` for `Tanker`
|
||||
- `55 -> 85` for `Auto_Carrier`
|
||||
- `6 -> 13 -> 27 -> 53` for `Passenger`
|
||||
- `7 -> 13 -> 27 -> 53` for `Mail`
|
||||
- `.cct` remains the least ambiguous sidecar: current shipped files still look like narrow one-row
|
||||
text metadata.
|
||||
|
||||
|
|
@ -33,6 +50,7 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
|
|||
- internal stem
|
||||
- auxiliary stem slot
|
||||
- side-view resource name
|
||||
- auxiliary-stem relation counts across the shipped corpus
|
||||
- `.lco`
|
||||
- full internal stem
|
||||
- conditional companion stem slot
|
||||
|
|
@ -41,14 +59,15 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
|
|||
- `.cgo`
|
||||
- leading scalar lane
|
||||
- content stem
|
||||
- scalar ladder counts by shared cargo-car family
|
||||
- `.cct`
|
||||
- tokenized identifier/value row
|
||||
|
||||
## Remaining Static Questions
|
||||
|
||||
- `.car`
|
||||
- what the `0xa2` auxiliary stem really represents across locomotive, tender, and freight-car
|
||||
families: alias root, image key, or alternate content stem
|
||||
- what the `0xa2` auxiliary stem really represents in the five remaining distinct cases:
|
||||
alternate content root, paired tender/loco image root, or a narrower foreign-display alias
|
||||
- whether the trailing side-view resource can be tied cleanly to `.imb` metadata without
|
||||
inventing frontend semantics
|
||||
- `.lco`
|
||||
|
|
@ -57,8 +76,8 @@ first `.car` / `.lco` / `.cgo` / `.cct` inspector pass landed.
|
|||
- how much of the early numeric lane block can be promoted from raw `u32/f32` views into stable
|
||||
typed semantics without dynamic evidence
|
||||
- `.cgo`
|
||||
- whether the leading scalar is enough to justify a named typed field, or whether it should stay
|
||||
a conservative raw scalar until more binary/code correlation exists
|
||||
- whether the leading scalar ladders are enough to justify a named typed field, or whether they
|
||||
should stay conservative report-only ladders until more binary/code correlation exists
|
||||
|
||||
## Next Static Parser Work
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue