Deepen engine type parser semantics

This commit is contained in:
Jan Petykiewicz 2026-04-21 22:44:51 -07:00
commit 1bd4158c0c
7 changed files with 297 additions and 33 deletions

View file

@ -7,7 +7,15 @@ use serde::{Deserialize, Serialize};
const CAR_PRIMARY_DISPLAY_NAME_OFFSET: usize = 0x0c;
const CAR_CONTENT_NAME_OFFSET: usize = 0x48;
const CAR_INTERNAL_STEM_OFFSET: usize = 0x84;
const CAR_AUXILIARY_STEM_OFFSET: usize = 0xa2;
const CAR_AUXILIARY_STEM_LEN: usize = 0x1e;
const CAR_SIDE_VIEW_RESOURCE_OFFSET: usize = 0xc0;
const CAR_SIDE_VIEW_RESOURCE_LEN: usize = 0x20;
const LCO_INTERNAL_STEM_OFFSET: usize = 0x04;
const LCO_COMPANION_STEM_OFFSET: usize = 0x0c;
const LCO_COMPANION_STEM_LEN: usize = 0x06;
const LCO_BODY_TYPE_LABEL_OFFSET: usize = 0x12;
const LCO_BODY_TYPE_LABEL_LEN: usize = 0x06;
const UNMATCHED_LOCOMOTIVE_DISPLAY_NAMES: [&str; 5] =
["242 A1", "Class 460", "Class A1", "Class P8", "Class QJ"];
const LCO_EARLY_LANE_OFFSETS: [usize; 14] = [
@ -24,6 +32,8 @@ pub struct EngineTypeCarInspectionReport {
pub primary_display_name: Option<String>,
pub content_name: Option<String>,
pub internal_stem: Option<String>,
pub auxiliary_stem: Option<String>,
pub side_view_resource: Option<String>,
pub notes: Vec<String>,
}
@ -42,6 +52,8 @@ pub struct EngineTypeLcoInspectionReport {
pub header_magic: Option<u32>,
pub header_magic_hex: Option<String>,
pub internal_stem: Option<String>,
pub companion_stem: Option<String>,
pub body_type_label: Option<String>,
pub early_lanes: Vec<EngineTypeRawLane>,
pub notes: Vec<String>,
}
@ -110,6 +122,10 @@ pub struct EngineTypeFamilyEntry {
pub primary_display_name: Option<String>,
pub content_name: Option<String>,
pub internal_stem: Option<String>,
pub auxiliary_stem: Option<String>,
pub side_view_resource: Option<String>,
pub companion_stem: Option<String>,
pub body_type_label: Option<String>,
pub cct_identifier: Option<String>,
pub cct_value: Option<i64>,
pub has_matched_locomotive_pair: bool,
@ -151,8 +167,18 @@ pub fn inspect_car_bytes(
primary_display_name: read_ascii_field(bytes, CAR_PRIMARY_DISPLAY_NAME_OFFSET),
content_name: read_ascii_field(bytes, CAR_CONTENT_NAME_OFFSET),
internal_stem: read_ascii_field(bytes, CAR_INTERNAL_STEM_OFFSET),
auxiliary_stem: read_ascii_slot(
bytes,
CAR_AUXILIARY_STEM_OFFSET,
CAR_AUXILIARY_STEM_LEN,
),
side_view_resource: read_ascii_slot(
bytes,
CAR_SIDE_VIEW_RESOURCE_OFFSET,
CAR_SIDE_VIEW_RESOURCE_LEN,
),
notes: vec![
"The current .car parser exposes the fixed header fields already grounded by the checked locomotive display census.".to_string(),
"The current .car parser exposes the fixed header strings already grounded by the checked locomotive display census, plus the auxiliary stem slot at 0xa2 and the trailing side-view resource name at 0xc0.".to_string(),
],
})
}
@ -185,9 +211,11 @@ pub fn inspect_lco_bytes(
header_magic: read_u32_le(bytes, 0),
header_magic_hex: read_u32_le(bytes, 0).map(|value| format!("0x{value:08x}")),
internal_stem: read_ascii_field(bytes, LCO_INTERNAL_STEM_OFFSET),
companion_stem: read_lco_companion_stem(bytes),
body_type_label: read_lco_body_type_label(bytes),
early_lanes,
notes: vec![
"The current .lco parser exposes the fixed stem at 0x04 plus the early raw lane block without asserting gameplay semantics for those numeric fields.".to_string(),
"The current .lco parser exposes the fixed-width stem slots at 0x04, 0x0c, and 0x12 plus the early raw lane block without asserting gameplay semantics for those numeric fields.".to_string(),
],
})
}
@ -226,8 +254,14 @@ pub fn inspect_cct_bytes(
bytes: &[u8],
) -> Result<EngineTypeCctInspectionReport, Box<dyn std::error::Error>> {
let text = decode_windows_1252(bytes);
let raw_lines = text.lines().map(|line| line.to_string()).collect::<Vec<_>>();
let first_nonblank = raw_lines.iter().find(|line| !line.trim().is_empty()).cloned();
let raw_lines = text
.lines()
.map(|line| line.to_string())
.collect::<Vec<_>>();
let first_nonblank = raw_lines
.iter()
.find(|line| !line.trim().is_empty())
.cloned();
let (identifier, value) = first_nonblank
.as_deref()
.map(parse_cct_row)
@ -298,7 +332,7 @@ pub fn inspect_engine_types_dir(
let family_entries = families
.values()
.map(|family| build_family_entry(family, &car_reports, &cct_reports))
.map(|family| build_family_entry(family, &car_reports, &lco_reports, &cct_reports))
.collect::<Vec<_>>();
let matched_locomotive_pair_count = family_entries
.iter()
@ -310,10 +344,22 @@ pub fn inspect_engine_types_dir(
Ok(EngineTypesInspectionReport {
source_root: path.display().to_string(),
family_count: family_entries.len(),
car_file_count: family_entries.iter().filter(|entry| entry.car_file.is_some()).count(),
lco_file_count: family_entries.iter().filter(|entry| entry.lco_file.is_some()).count(),
cgo_file_count: family_entries.iter().filter(|entry| entry.cgo_file.is_some()).count(),
cct_file_count: family_entries.iter().filter(|entry| entry.cct_file.is_some()).count(),
car_file_count: family_entries
.iter()
.filter(|entry| entry.car_file.is_some())
.count(),
lco_file_count: family_entries
.iter()
.filter(|entry| entry.lco_file.is_some())
.count(),
cgo_file_count: family_entries
.iter()
.filter(|entry| entry.cgo_file.is_some())
.count(),
cct_file_count: family_entries
.iter()
.filter(|entry| entry.cct_file.is_some())
.count(),
matched_locomotive_pair_count,
unmatched_car_file_count: family_entries
.iter()
@ -325,11 +371,15 @@ pub fn inspect_engine_types_dir(
.count(),
unmatched_cgo_file_count: family_entries
.iter()
.filter(|entry| entry.cgo_file.is_some() && !(entry.car_file.is_some() || entry.lco_file.is_some()))
.filter(|entry| {
entry.cgo_file.is_some() && !(entry.car_file.is_some() || entry.lco_file.is_some())
})
.count(),
unmatched_cct_file_count: family_entries
.iter()
.filter(|entry| entry.cct_file.is_some() && !(entry.car_file.is_some() || entry.lco_file.is_some()))
.filter(|entry| {
entry.cct_file.is_some() && !(entry.car_file.is_some() || entry.lco_file.is_some())
})
.count(),
locomotive_display_census,
families: family_entries,
@ -348,12 +398,17 @@ struct EngineTypeFamilyBuilder {
fn build_family_entry(
family: &EngineTypeFamilyBuilder,
car_reports: &BTreeMap<String, EngineTypeCarInspectionReport>,
lco_reports: &BTreeMap<String, EngineTypeLcoInspectionReport>,
cct_reports: &BTreeMap<String, EngineTypeCctInspectionReport>,
) -> EngineTypeFamilyEntry {
let car_report = family
.car_file
.as_ref()
.and_then(|file_name| car_reports.get(file_name));
let lco_report = family
.lco_file
.as_ref()
.and_then(|file_name| lco_reports.get(file_name));
let cct_report = family
.cct_file
.as_ref()
@ -367,6 +422,10 @@ fn build_family_entry(
primary_display_name: car_report.and_then(|report| report.primary_display_name.clone()),
content_name: car_report.and_then(|report| report.content_name.clone()),
internal_stem: car_report.and_then(|report| report.internal_stem.clone()),
auxiliary_stem: car_report.and_then(|report| report.auxiliary_stem.clone()),
side_view_resource: car_report.and_then(|report| report.side_view_resource.clone()),
companion_stem: lco_report.and_then(|report| report.companion_stem.clone()),
body_type_label: lco_report.and_then(|report| report.body_type_label.clone()),
cct_identifier: cct_report.and_then(|report| report.identifier.clone()),
cct_value: cct_report.and_then(|report| report.value),
has_matched_locomotive_pair: family.car_file.is_some() && family.lco_file.is_some(),
@ -414,7 +473,10 @@ fn build_locomotive_display_census(
.count();
let mut car_header_layout = BTreeMap::new();
car_header_layout.insert("format_version_dword_offset".to_string(), "0x00".to_string());
car_header_layout.insert(
"format_version_dword_offset".to_string(),
"0x00".to_string(),
);
car_header_layout.insert("record_kind_dword_offset".to_string(), "0x04".to_string());
car_header_layout.insert(
"primary_display_name_offset".to_string(),
@ -464,6 +526,37 @@ fn read_ascii_field(bytes: &[u8], offset: usize) -> Option<String> {
(!value.is_empty()).then_some(value)
}
fn read_ascii_slot(bytes: &[u8], offset: usize, len: usize) -> Option<String> {
let slot = bytes.get(offset..offset + len)?;
let end = slot
.iter()
.position(|byte| *byte == 0 || !byte.is_ascii() || *byte == 0xcd)
.unwrap_or(slot.len());
let value = String::from_utf8(slot[..end].to_vec()).ok()?;
(!value.is_empty()).then_some(value)
}
fn slot_is_padded(bytes: &[u8], offset: usize, len: usize) -> bool {
bytes
.get(offset..offset + len)
.map(|slot| slot.contains(&0))
.unwrap_or(false)
}
fn read_lco_companion_stem(bytes: &[u8]) -> Option<String> {
slot_is_padded(bytes, LCO_INTERNAL_STEM_OFFSET, 0x08)
.then(|| read_ascii_slot(bytes, LCO_COMPANION_STEM_OFFSET, LCO_COMPANION_STEM_LEN))
.flatten()
}
fn read_lco_body_type_label(bytes: &[u8]) -> Option<String> {
let companion_slot_is_padded =
slot_is_padded(bytes, LCO_COMPANION_STEM_OFFSET, LCO_COMPANION_STEM_LEN);
companion_slot_is_padded
.then(|| read_ascii_slot(bytes, LCO_BODY_TYPE_LABEL_OFFSET, LCO_BODY_TYPE_LABEL_LEN))
.flatten()
}
fn parse_cct_row(line: &str) -> (Option<String>, Option<i64>) {
let mut parts = line.split_whitespace();
let identifier = parts.next().map(|value| value.to_string());
@ -472,7 +565,10 @@ fn parse_cct_row(line: &str) -> (Option<String>, Option<i64>) {
}
fn decode_windows_1252(bytes: &[u8]) -> String {
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
bytes
.iter()
.map(|byte| decode_windows_1252_byte(*byte))
.collect()
}
fn decode_windows_1252_byte(byte: u8) -> char {
@ -514,36 +610,57 @@ mod tests {
#[test]
fn parses_car_header_fields() {
let mut bytes = vec![0u8; 0x90];
let mut bytes = vec![0u8; 0xe0];
bytes[0..4].copy_from_slice(&0x03eau32.to_le_bytes());
bytes[4..8].copy_from_slice(&2u32.to_le_bytes());
bytes[0x0c..0x0c + 6].copy_from_slice(b"2-D-2\0");
bytes[0x48..0x48 + 5].copy_from_slice(b"2D2L\0");
bytes[0x84..0x84 + 5].copy_from_slice(b"2D2L\0");
bytes[0xa2..0xa2 + 5].copy_from_slice(b"2D2L\0");
bytes[0xc0..0xc0 + 18].copy_from_slice(b"CarSideView_2.imb\0");
let report = inspect_car_bytes(&bytes).expect("car should parse");
assert_eq!(report.header_magic, Some(0x03ea));
assert_eq!(report.primary_display_name.as_deref(), Some("2-D-2"));
assert_eq!(report.internal_stem.as_deref(), Some("2D2L"));
assert_eq!(report.auxiliary_stem.as_deref(), Some("2D2L"));
assert_eq!(
report.side_view_resource.as_deref(),
Some("CarSideView_2.imb")
);
}
#[test]
fn parses_lco_header_and_lanes() {
fn parses_lco_header_slots_and_lanes() {
let mut bytes = vec![0u8; 0x58];
bytes[0..4].copy_from_slice(&0x07d5u32.to_le_bytes());
bytes[4..4 + 5].copy_from_slice(b"2D2L\0");
bytes[4..4 + 5].copy_from_slice(b"GP7L\0");
bytes[0x0c..0x0c + 6].copy_from_slice(b"VL80T\0");
bytes[0x12..0x12 + 5].copy_from_slice(b"Loco\0");
bytes[0x20..0x24].copy_from_slice(&100u32.to_le_bytes());
let report = inspect_lco_bytes(&bytes).expect("lco should parse");
assert_eq!(report.header_magic, Some(0x07d5));
assert_eq!(report.internal_stem.as_deref(), Some("2D2L"));
assert_eq!(report.internal_stem.as_deref(), Some("GP7L"));
assert_eq!(report.companion_stem.as_deref(), Some("VL80T"));
assert_eq!(report.body_type_label.as_deref(), Some("Loco"));
assert_eq!(report.early_lanes[0].raw_u32, 100);
}
#[test]
fn does_not_misclassify_long_lco_stems_as_companion_slots() {
let mut bytes = vec![0u8; 0x20];
bytes[4..4 + 9].copy_from_slice(b"AtlanticL");
let report = inspect_lco_bytes(&bytes).expect("lco should parse");
assert_eq!(report.internal_stem.as_deref(), Some("AtlanticL"));
assert_eq!(report.companion_stem, None);
assert_eq!(report.body_type_label, None);
}
#[test]
fn parses_cgo_and_cct_files() {
let cgo = inspect_cgo_bytes(b"\x00\x00\\BAuto_Carrier\0")
.expect("cgo should parse");
let cgo = inspect_cgo_bytes(b"\x00\x00\\BAuto_Carrier\0").expect("cgo should parse");
assert_eq!(cgo.content_stem.as_deref(), Some("Auto_Carrier"));
let cct = inspect_cct_bytes(b"Auto_Carrier 13\n").expect("cct should parse");
@ -551,6 +668,67 @@ mod tests {
assert_eq!(cct.value, Some(13));
}
#[test]
fn builds_family_entry_with_extended_car_and_lco_slots() {
let family = EngineTypeFamilyBuilder {
canonical_stem: "gp7".to_string(),
car_file: Some("GP7.car".to_string()),
lco_file: Some("GP7.lco".to_string()),
cgo_file: Some("GP7.cgo".to_string()),
cct_file: Some("GP7.cct".to_string()),
};
let car_reports = BTreeMap::from([(
"GP7.car".to_string(),
EngineTypeCarInspectionReport {
file_size: 0,
header_magic: None,
header_magic_hex: None,
record_kind: None,
record_kind_hex: None,
primary_display_name: Some("GP7".to_string()),
content_name: Some("GP7".to_string()),
internal_stem: Some("GP7L".to_string()),
auxiliary_stem: Some("GP7L".to_string()),
side_view_resource: Some("CarSideView_1.imb".to_string()),
notes: Vec::new(),
},
)]);
let lco_reports = BTreeMap::from([(
"GP7.lco".to_string(),
EngineTypeLcoInspectionReport {
file_size: 0,
header_magic: None,
header_magic_hex: None,
internal_stem: Some("GP7L".to_string()),
companion_stem: Some("VL80T".to_string()),
body_type_label: Some("Loco".to_string()),
early_lanes: Vec::new(),
notes: Vec::new(),
},
)]);
let cct_reports = BTreeMap::from([(
"GP7.cct".to_string(),
EngineTypeCctInspectionReport {
file_size: 0,
line_count: 1,
identifier: Some("GP7".to_string()),
value: Some(13),
raw_lines: vec!["GP7 13".to_string()],
notes: Vec::new(),
},
)]);
let entry = build_family_entry(&family, &car_reports, &lco_reports, &cct_reports);
assert_eq!(entry.auxiliary_stem.as_deref(), Some("GP7L"));
assert_eq!(
entry.side_view_resource.as_deref(),
Some("CarSideView_1.imb")
);
assert_eq!(entry.companion_stem.as_deref(), Some("VL80T"));
assert_eq!(entry.body_type_label.as_deref(), Some("Loco"));
assert_eq!(entry.cct_identifier.as_deref(), Some("GP7"));
}
#[test]
fn builds_locomotive_display_census() {
let mut car_reports = BTreeMap::new();
@ -565,6 +743,8 @@ mod tests {
primary_display_name: Some("2-D-2".to_string()),
content_name: Some("2D2L".to_string()),
internal_stem: Some("2D2L".to_string()),
auxiliary_stem: Some("2D2L".to_string()),
side_view_resource: Some("CarSideView_2.imb".to_string()),
notes: Vec::new(),
},
);
@ -577,6 +757,10 @@ mod tests {
primary_display_name: Some("2-D-2".to_string()),
content_name: Some("2D2L".to_string()),
internal_stem: Some("2D2L".to_string()),
auxiliary_stem: Some("2D2L".to_string()),
side_view_resource: Some("CarSideView_2.imb".to_string()),
companion_stem: None,
body_type_label: None,
cct_identifier: None,
cct_value: None,
has_matched_locomotive_pair: true,

View file

@ -93,7 +93,10 @@ fn parse_f64_tokens(tokens: &[String]) -> Option<Vec<f64>> {
}
fn decode_windows_1252(bytes: &[u8]) -> String {
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
bytes
.iter()
.map(|byte| decode_windows_1252_byte(*byte))
.collect()
}
fn decode_windows_1252_byte(byte: u8) -> char {
@ -135,10 +138,8 @@ mod tests {
#[test]
fn parses_scalar_and_tuple_lines() {
let report = inspect_imb_bytes(
b"TGAName ICE_Profile\nTGAWidth 256\nImageWH 0 0 138 32\n",
)
.expect("imb should parse");
let report = inspect_imb_bytes(b"TGAName ICE_Profile\nTGAWidth 256\nImageWH 0 0 138 32\n")
.expect("imb should parse");
assert_eq!(report.entry_count, 3);
assert_eq!(report.entries[0].key, "TGAName");

View file

@ -97,7 +97,10 @@ pub fn inspect_lng_bytes(bytes: &[u8]) -> Result<LngInspectionReport, Box<dyn st
.iter()
.map(|entry| entry.kind.as_str())
.collect::<BTreeSet<_>>();
let format_family = match (format_kinds.contains("string"), format_kinds.contains("styled")) {
let format_family = match (
format_kinds.contains("string"),
format_kinds.contains("styled"),
) {
(true, false) => "quoted-string-table".to_string(),
(false, true) => "styled-credits-lines".to_string(),
(true, true) => "mixed-language-table".to_string(),
@ -189,7 +192,10 @@ fn normalize_lng_text(text: &str) -> String {
}
fn decode_windows_1252(bytes: &[u8]) -> String {
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
bytes
.iter()
.map(|byte| decode_windows_1252_byte(*byte))
.collect()
}
fn decode_windows_1252_byte(byte: u8) -> char {