Add parsers for RT3 language and engine type assets
This commit is contained in:
parent
8ebced08c0
commit
61472bf72d
17 changed files with 32835 additions and 9 deletions
|
|
@ -1,6 +1,9 @@
|
|||
pub mod building;
|
||||
pub mod campaign;
|
||||
pub mod cargo;
|
||||
pub mod engine_types;
|
||||
pub mod imb;
|
||||
pub mod lng;
|
||||
pub mod pk4;
|
||||
pub mod smp;
|
||||
pub mod win;
|
||||
|
|
|
|||
592
crates/rrt-runtime/src/inspect/engine_types.rs
Normal file
592
crates/rrt-runtime/src/inspect/engine_types.rs
Normal file
|
|
@ -0,0 +1,592 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
const CAR_PRIMARY_DISPLAY_NAME_OFFSET: usize = 0x0c;
|
||||
const CAR_CONTENT_NAME_OFFSET: usize = 0x48;
|
||||
const CAR_INTERNAL_STEM_OFFSET: usize = 0x84;
|
||||
const LCO_INTERNAL_STEM_OFFSET: usize = 0x04;
|
||||
const UNMATCHED_LOCOMOTIVE_DISPLAY_NAMES: [&str; 5] =
|
||||
["242 A1", "Class 460", "Class A1", "Class P8", "Class QJ"];
|
||||
const LCO_EARLY_LANE_OFFSETS: [usize; 14] = [
|
||||
0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54,
|
||||
];
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeCarInspectionReport {
|
||||
pub file_size: usize,
|
||||
pub header_magic: Option<u32>,
|
||||
pub header_magic_hex: Option<String>,
|
||||
pub record_kind: Option<u32>,
|
||||
pub record_kind_hex: Option<String>,
|
||||
pub primary_display_name: Option<String>,
|
||||
pub content_name: Option<String>,
|
||||
pub internal_stem: Option<String>,
|
||||
pub notes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeRawLane {
|
||||
pub offset: usize,
|
||||
pub offset_hex: String,
|
||||
pub raw_u32: u32,
|
||||
pub raw_u32_hex: String,
|
||||
pub raw_f32: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeLcoInspectionReport {
|
||||
pub file_size: usize,
|
||||
pub header_magic: Option<u32>,
|
||||
pub header_magic_hex: Option<String>,
|
||||
pub internal_stem: Option<String>,
|
||||
pub early_lanes: Vec<EngineTypeRawLane>,
|
||||
pub notes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeCgoInspectionReport {
|
||||
pub file_size: usize,
|
||||
pub leading_u32: Option<u32>,
|
||||
pub leading_u32_hex: Option<String>,
|
||||
pub leading_f32: Option<f32>,
|
||||
pub content_stem: Option<String>,
|
||||
pub notes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeCctInspectionReport {
|
||||
pub file_size: usize,
|
||||
pub line_count: usize,
|
||||
pub identifier: Option<String>,
|
||||
pub value: Option<i64>,
|
||||
pub raw_lines: Vec<String>,
|
||||
pub notes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeLocomotiveDisplayEntry {
|
||||
pub car_file: String,
|
||||
pub lco_file: String,
|
||||
pub primary_display_name: String,
|
||||
pub content_name: String,
|
||||
pub internal_stem: String,
|
||||
pub matches_grounded_prefix_name: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeLocomotiveDisplayFamily {
|
||||
pub car_file: String,
|
||||
pub lco_file: String,
|
||||
pub primary_display_name: String,
|
||||
pub content_name: String,
|
||||
pub internal_stem: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeLocomotiveDisplayCensusReport {
|
||||
pub format_version: u32,
|
||||
pub semantic_family: String,
|
||||
pub source_root: String,
|
||||
pub car_header_layout: BTreeMap<String, String>,
|
||||
pub observed_locomotive_pair_count: usize,
|
||||
pub grounded_prefix_count: usize,
|
||||
pub grounded_prefix_match_count: usize,
|
||||
pub unmatched_display_family_count: usize,
|
||||
pub unmatched_display_families: Vec<EngineTypeLocomotiveDisplayFamily>,
|
||||
pub entries: Vec<EngineTypeLocomotiveDisplayEntry>,
|
||||
pub notes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EngineTypeFamilyEntry {
|
||||
pub canonical_stem: String,
|
||||
pub car_file: Option<String>,
|
||||
pub lco_file: Option<String>,
|
||||
pub cgo_file: Option<String>,
|
||||
pub cct_file: Option<String>,
|
||||
pub primary_display_name: Option<String>,
|
||||
pub content_name: Option<String>,
|
||||
pub internal_stem: Option<String>,
|
||||
pub cct_identifier: Option<String>,
|
||||
pub cct_value: Option<i64>,
|
||||
pub has_matched_locomotive_pair: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct EngineTypesInspectionReport {
|
||||
pub source_root: String,
|
||||
pub family_count: usize,
|
||||
pub car_file_count: usize,
|
||||
pub lco_file_count: usize,
|
||||
pub cgo_file_count: usize,
|
||||
pub cct_file_count: usize,
|
||||
pub matched_locomotive_pair_count: usize,
|
||||
pub unmatched_car_file_count: usize,
|
||||
pub unmatched_lco_file_count: usize,
|
||||
pub unmatched_cgo_file_count: usize,
|
||||
pub unmatched_cct_file_count: usize,
|
||||
pub locomotive_display_census: EngineTypeLocomotiveDisplayCensusReport,
|
||||
pub families: Vec<EngineTypeFamilyEntry>,
|
||||
}
|
||||
|
||||
pub fn inspect_car_file(
|
||||
path: &Path,
|
||||
) -> Result<EngineTypeCarInspectionReport, Box<dyn std::error::Error>> {
|
||||
let bytes = fs::read(path)?;
|
||||
inspect_car_bytes(&bytes)
|
||||
}
|
||||
|
||||
pub fn inspect_car_bytes(
|
||||
bytes: &[u8],
|
||||
) -> Result<EngineTypeCarInspectionReport, Box<dyn std::error::Error>> {
|
||||
Ok(EngineTypeCarInspectionReport {
|
||||
file_size: bytes.len(),
|
||||
header_magic: read_u32_le(bytes, 0),
|
||||
header_magic_hex: read_u32_le(bytes, 0).map(|value| format!("0x{value:08x}")),
|
||||
record_kind: read_u32_le(bytes, 4),
|
||||
record_kind_hex: read_u32_le(bytes, 4).map(|value| format!("0x{value:08x}")),
|
||||
primary_display_name: read_ascii_field(bytes, CAR_PRIMARY_DISPLAY_NAME_OFFSET),
|
||||
content_name: read_ascii_field(bytes, CAR_CONTENT_NAME_OFFSET),
|
||||
internal_stem: read_ascii_field(bytes, CAR_INTERNAL_STEM_OFFSET),
|
||||
notes: vec![
|
||||
"The current .car parser exposes the fixed header fields already grounded by the checked locomotive display census.".to_string(),
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn inspect_lco_file(
|
||||
path: &Path,
|
||||
) -> Result<EngineTypeLcoInspectionReport, Box<dyn std::error::Error>> {
|
||||
let bytes = fs::read(path)?;
|
||||
inspect_lco_bytes(&bytes)
|
||||
}
|
||||
|
||||
pub fn inspect_lco_bytes(
|
||||
bytes: &[u8],
|
||||
) -> Result<EngineTypeLcoInspectionReport, Box<dyn std::error::Error>> {
|
||||
let early_lanes = LCO_EARLY_LANE_OFFSETS
|
||||
.iter()
|
||||
.filter_map(|offset| {
|
||||
let raw_u32 = read_u32_le(bytes, *offset)?;
|
||||
Some(EngineTypeRawLane {
|
||||
offset: *offset,
|
||||
offset_hex: format!("0x{offset:04x}"),
|
||||
raw_u32,
|
||||
raw_u32_hex: format!("0x{raw_u32:08x}"),
|
||||
raw_f32: f32::from_bits(raw_u32),
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(EngineTypeLcoInspectionReport {
|
||||
file_size: bytes.len(),
|
||||
header_magic: read_u32_le(bytes, 0),
|
||||
header_magic_hex: read_u32_le(bytes, 0).map(|value| format!("0x{value:08x}")),
|
||||
internal_stem: read_ascii_field(bytes, LCO_INTERNAL_STEM_OFFSET),
|
||||
early_lanes,
|
||||
notes: vec![
|
||||
"The current .lco parser exposes the fixed stem at 0x04 plus the early raw lane block without asserting gameplay semantics for those numeric fields.".to_string(),
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn inspect_cgo_file(
|
||||
path: &Path,
|
||||
) -> Result<EngineTypeCgoInspectionReport, Box<dyn std::error::Error>> {
|
||||
let bytes = fs::read(path)?;
|
||||
inspect_cgo_bytes(&bytes)
|
||||
}
|
||||
|
||||
pub fn inspect_cgo_bytes(
|
||||
bytes: &[u8],
|
||||
) -> Result<EngineTypeCgoInspectionReport, Box<dyn std::error::Error>> {
|
||||
let leading_u32 = read_u32_le(bytes, 0);
|
||||
Ok(EngineTypeCgoInspectionReport {
|
||||
file_size: bytes.len(),
|
||||
leading_u32,
|
||||
leading_u32_hex: leading_u32.map(|value| format!("0x{value:08x}")),
|
||||
leading_f32: leading_u32.map(f32::from_bits),
|
||||
content_stem: read_ascii_field(bytes, 4),
|
||||
notes: vec![
|
||||
"The current .cgo parser is intentionally conservative: it exposes the leading scalar lane plus the inline content stem without overclaiming the remaining payload layout.".to_string(),
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn inspect_cct_file(
|
||||
path: &Path,
|
||||
) -> Result<EngineTypeCctInspectionReport, Box<dyn std::error::Error>> {
|
||||
let bytes = fs::read(path)?;
|
||||
inspect_cct_bytes(&bytes)
|
||||
}
|
||||
|
||||
pub fn inspect_cct_bytes(
|
||||
bytes: &[u8],
|
||||
) -> Result<EngineTypeCctInspectionReport, Box<dyn std::error::Error>> {
|
||||
let text = decode_windows_1252(bytes);
|
||||
let raw_lines = text.lines().map(|line| line.to_string()).collect::<Vec<_>>();
|
||||
let first_nonblank = raw_lines.iter().find(|line| !line.trim().is_empty()).cloned();
|
||||
let (identifier, value) = first_nonblank
|
||||
.as_deref()
|
||||
.map(parse_cct_row)
|
||||
.unwrap_or((None, None));
|
||||
Ok(EngineTypeCctInspectionReport {
|
||||
file_size: bytes.len(),
|
||||
line_count: raw_lines.len(),
|
||||
identifier,
|
||||
value,
|
||||
raw_lines,
|
||||
notes: vec![
|
||||
"The current .cct parser preserves the first observed identifier/value row and the raw text lines without claiming wider semantics yet.".to_string(),
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn inspect_engine_types_dir(
|
||||
path: &Path,
|
||||
) -> Result<EngineTypesInspectionReport, Box<dyn std::error::Error>> {
|
||||
let mut families = BTreeMap::<String, EngineTypeFamilyBuilder>::new();
|
||||
let mut car_reports = BTreeMap::<String, EngineTypeCarInspectionReport>::new();
|
||||
let mut lco_reports = BTreeMap::<String, EngineTypeLcoInspectionReport>::new();
|
||||
let mut cgo_reports = BTreeMap::<String, EngineTypeCgoInspectionReport>::new();
|
||||
let mut cct_reports = BTreeMap::<String, EngineTypeCctInspectionReport>::new();
|
||||
|
||||
for entry in fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
if !entry.file_type()?.is_file() {
|
||||
continue;
|
||||
}
|
||||
let file_name = entry.file_name().to_string_lossy().into_owned();
|
||||
let Some(stem) = Path::new(&file_name)
|
||||
.file_stem()
|
||||
.and_then(|stem| stem.to_str())
|
||||
.map(|stem| stem.to_string())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let Some(extension) = Path::new(&file_name)
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.map(|ext| ext.to_ascii_lowercase())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let family = families.entry(stem.to_ascii_lowercase()).or_default();
|
||||
family.canonical_stem = stem.to_ascii_lowercase();
|
||||
match extension.as_str() {
|
||||
"car" => {
|
||||
family.car_file = Some(file_name.clone());
|
||||
car_reports.insert(file_name.clone(), inspect_car_file(&entry.path())?);
|
||||
}
|
||||
"lco" => {
|
||||
family.lco_file = Some(file_name.clone());
|
||||
lco_reports.insert(file_name.clone(), inspect_lco_file(&entry.path())?);
|
||||
}
|
||||
"cgo" => {
|
||||
family.cgo_file = Some(file_name.clone());
|
||||
cgo_reports.insert(file_name.clone(), inspect_cgo_file(&entry.path())?);
|
||||
}
|
||||
"cct" => {
|
||||
family.cct_file = Some(file_name.clone());
|
||||
cct_reports.insert(file_name.clone(), inspect_cct_file(&entry.path())?);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let family_entries = families
|
||||
.values()
|
||||
.map(|family| build_family_entry(family, &car_reports, &cct_reports))
|
||||
.collect::<Vec<_>>();
|
||||
let matched_locomotive_pair_count = family_entries
|
||||
.iter()
|
||||
.filter(|family| family.has_matched_locomotive_pair)
|
||||
.count();
|
||||
let locomotive_display_census =
|
||||
build_locomotive_display_census(path, &family_entries, &car_reports)?;
|
||||
|
||||
Ok(EngineTypesInspectionReport {
|
||||
source_root: path.display().to_string(),
|
||||
family_count: family_entries.len(),
|
||||
car_file_count: family_entries.iter().filter(|entry| entry.car_file.is_some()).count(),
|
||||
lco_file_count: family_entries.iter().filter(|entry| entry.lco_file.is_some()).count(),
|
||||
cgo_file_count: family_entries.iter().filter(|entry| entry.cgo_file.is_some()).count(),
|
||||
cct_file_count: family_entries.iter().filter(|entry| entry.cct_file.is_some()).count(),
|
||||
matched_locomotive_pair_count,
|
||||
unmatched_car_file_count: family_entries
|
||||
.iter()
|
||||
.filter(|entry| entry.car_file.is_some() && entry.lco_file.is_none())
|
||||
.count(),
|
||||
unmatched_lco_file_count: family_entries
|
||||
.iter()
|
||||
.filter(|entry| entry.car_file.is_none() && entry.lco_file.is_some())
|
||||
.count(),
|
||||
unmatched_cgo_file_count: family_entries
|
||||
.iter()
|
||||
.filter(|entry| entry.cgo_file.is_some() && !(entry.car_file.is_some() || entry.lco_file.is_some()))
|
||||
.count(),
|
||||
unmatched_cct_file_count: family_entries
|
||||
.iter()
|
||||
.filter(|entry| entry.cct_file.is_some() && !(entry.car_file.is_some() || entry.lco_file.is_some()))
|
||||
.count(),
|
||||
locomotive_display_census,
|
||||
families: family_entries,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct EngineTypeFamilyBuilder {
|
||||
canonical_stem: String,
|
||||
car_file: Option<String>,
|
||||
lco_file: Option<String>,
|
||||
cgo_file: Option<String>,
|
||||
cct_file: Option<String>,
|
||||
}
|
||||
|
||||
fn build_family_entry(
|
||||
family: &EngineTypeFamilyBuilder,
|
||||
car_reports: &BTreeMap<String, EngineTypeCarInspectionReport>,
|
||||
cct_reports: &BTreeMap<String, EngineTypeCctInspectionReport>,
|
||||
) -> EngineTypeFamilyEntry {
|
||||
let car_report = family
|
||||
.car_file
|
||||
.as_ref()
|
||||
.and_then(|file_name| car_reports.get(file_name));
|
||||
let cct_report = family
|
||||
.cct_file
|
||||
.as_ref()
|
||||
.and_then(|file_name| cct_reports.get(file_name));
|
||||
EngineTypeFamilyEntry {
|
||||
canonical_stem: family.canonical_stem.clone(),
|
||||
car_file: family.car_file.clone(),
|
||||
lco_file: family.lco_file.clone(),
|
||||
cgo_file: family.cgo_file.clone(),
|
||||
cct_file: family.cct_file.clone(),
|
||||
primary_display_name: car_report.and_then(|report| report.primary_display_name.clone()),
|
||||
content_name: car_report.and_then(|report| report.content_name.clone()),
|
||||
internal_stem: car_report.and_then(|report| report.internal_stem.clone()),
|
||||
cct_identifier: cct_report.and_then(|report| report.identifier.clone()),
|
||||
cct_value: cct_report.and_then(|report| report.value),
|
||||
has_matched_locomotive_pair: family.car_file.is_some() && family.lco_file.is_some(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_locomotive_display_census(
|
||||
path: &Path,
|
||||
families: &[EngineTypeFamilyEntry],
|
||||
car_reports: &BTreeMap<String, EngineTypeCarInspectionReport>,
|
||||
) -> Result<EngineTypeLocomotiveDisplayCensusReport, Box<dyn std::error::Error>> {
|
||||
let mut entries = families
|
||||
.iter()
|
||||
.filter_map(|family| {
|
||||
let car_file = family.car_file.clone()?;
|
||||
let lco_file = family.lco_file.clone()?;
|
||||
let car_report = car_reports.get(&car_file)?;
|
||||
Some(EngineTypeLocomotiveDisplayEntry {
|
||||
car_file: car_file.clone(),
|
||||
lco_file,
|
||||
primary_display_name: car_report.primary_display_name.clone().unwrap_or_default(),
|
||||
content_name: car_report.content_name.clone().unwrap_or_default(),
|
||||
internal_stem: car_report.internal_stem.clone().unwrap_or_default(),
|
||||
matches_grounded_prefix_name: !UNMATCHED_LOCOMOTIVE_DISPLAY_NAMES
|
||||
.contains(&car_report.primary_display_name.as_deref().unwrap_or("")),
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
entries.sort_by(|left, right| left.car_file.cmp(&right.car_file));
|
||||
|
||||
let unmatched_display_families = entries
|
||||
.iter()
|
||||
.filter(|entry| !entry.matches_grounded_prefix_name)
|
||||
.map(|entry| EngineTypeLocomotiveDisplayFamily {
|
||||
car_file: entry.car_file.clone(),
|
||||
lco_file: entry.lco_file.clone(),
|
||||
primary_display_name: entry.primary_display_name.clone(),
|
||||
content_name: entry.content_name.clone(),
|
||||
internal_stem: entry.internal_stem.clone(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let grounded_prefix_count = entries
|
||||
.iter()
|
||||
.filter(|entry| entry.matches_grounded_prefix_name)
|
||||
.count();
|
||||
|
||||
let mut car_header_layout = BTreeMap::new();
|
||||
car_header_layout.insert("format_version_dword_offset".to_string(), "0x00".to_string());
|
||||
car_header_layout.insert("record_kind_dword_offset".to_string(), "0x04".to_string());
|
||||
car_header_layout.insert(
|
||||
"primary_display_name_offset".to_string(),
|
||||
format!("0x{CAR_PRIMARY_DISPLAY_NAME_OFFSET:02x}"),
|
||||
);
|
||||
car_header_layout.insert(
|
||||
"content_name_offset".to_string(),
|
||||
format!("0x{CAR_CONTENT_NAME_OFFSET:02x}"),
|
||||
);
|
||||
car_header_layout.insert(
|
||||
"internal_stem_offset".to_string(),
|
||||
format!("0x{CAR_INTERNAL_STEM_OFFSET:02x}"),
|
||||
);
|
||||
|
||||
Ok(EngineTypeLocomotiveDisplayCensusReport {
|
||||
format_version: 1,
|
||||
semantic_family: "engine-type-locomotive-display-census".to_string(),
|
||||
source_root: path.display().to_string(),
|
||||
car_header_layout,
|
||||
observed_locomotive_pair_count: entries.len(),
|
||||
grounded_prefix_count,
|
||||
grounded_prefix_match_count: grounded_prefix_count,
|
||||
unmatched_display_family_count: unmatched_display_families.len(),
|
||||
unmatched_display_families,
|
||||
entries,
|
||||
notes: vec![
|
||||
"Each row comes from one shipped .car/.lco locomotive engine-type pair under Data/EngineTypes.".to_string(),
|
||||
"The primary display string is parsed directly from the .car header at 0x0c rather than inferred from strings output.".to_string(),
|
||||
"The five unmatched display families are shipped named locomotive assets whose names do not appear in the current 61-name grounded descriptor prefix.".to_string(),
|
||||
"This export grounds the extra shipped locomotive-name cohort, but it does not by itself prove where those names land in the live ordinal catalog or descriptor bands.".to_string(),
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
fn read_u32_le(bytes: &[u8], offset: usize) -> Option<u32> {
|
||||
let slice = bytes.get(offset..offset + 4)?;
|
||||
Some(u32::from_le_bytes(slice.try_into().ok()?))
|
||||
}
|
||||
|
||||
fn read_ascii_field(bytes: &[u8], offset: usize) -> Option<String> {
|
||||
let tail = bytes.get(offset..)?;
|
||||
let end = tail
|
||||
.iter()
|
||||
.position(|byte| *byte == 0 || !byte.is_ascii() || *byte == 0xcd)
|
||||
.unwrap_or(tail.len());
|
||||
let value = String::from_utf8(tail[..end].to_vec()).ok()?;
|
||||
(!value.is_empty()).then_some(value)
|
||||
}
|
||||
|
||||
fn parse_cct_row(line: &str) -> (Option<String>, Option<i64>) {
|
||||
let mut parts = line.split_whitespace();
|
||||
let identifier = parts.next().map(|value| value.to_string());
|
||||
let value = parts.next().and_then(|value| value.parse().ok());
|
||||
(identifier, value)
|
||||
}
|
||||
|
||||
fn decode_windows_1252(bytes: &[u8]) -> String {
|
||||
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
|
||||
}
|
||||
|
||||
fn decode_windows_1252_byte(byte: u8) -> char {
|
||||
match byte {
|
||||
0x80 => '\u{20AC}',
|
||||
0x82 => '\u{201A}',
|
||||
0x83 => '\u{0192}',
|
||||
0x84 => '\u{201E}',
|
||||
0x85 => '\u{2026}',
|
||||
0x86 => '\u{2020}',
|
||||
0x87 => '\u{2021}',
|
||||
0x88 => '\u{02C6}',
|
||||
0x89 => '\u{2030}',
|
||||
0x8A => '\u{0160}',
|
||||
0x8B => '\u{2039}',
|
||||
0x8C => '\u{0152}',
|
||||
0x8E => '\u{017D}',
|
||||
0x91 => '\u{2018}',
|
||||
0x92 => '\u{2019}',
|
||||
0x93 => '\u{201C}',
|
||||
0x94 => '\u{201D}',
|
||||
0x95 => '\u{2022}',
|
||||
0x96 => '\u{2013}',
|
||||
0x97 => '\u{2014}',
|
||||
0x98 => '\u{02DC}',
|
||||
0x99 => '\u{2122}',
|
||||
0x9A => '\u{0161}',
|
||||
0x9B => '\u{203A}',
|
||||
0x9C => '\u{0153}',
|
||||
0x9E => '\u{017E}',
|
||||
0x9F => '\u{0178}',
|
||||
_ => byte as char,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parses_car_header_fields() {
|
||||
let mut bytes = vec![0u8; 0x90];
|
||||
bytes[0..4].copy_from_slice(&0x03eau32.to_le_bytes());
|
||||
bytes[4..8].copy_from_slice(&2u32.to_le_bytes());
|
||||
bytes[0x0c..0x0c + 6].copy_from_slice(b"2-D-2\0");
|
||||
bytes[0x48..0x48 + 5].copy_from_slice(b"2D2L\0");
|
||||
bytes[0x84..0x84 + 5].copy_from_slice(b"2D2L\0");
|
||||
|
||||
let report = inspect_car_bytes(&bytes).expect("car should parse");
|
||||
assert_eq!(report.header_magic, Some(0x03ea));
|
||||
assert_eq!(report.primary_display_name.as_deref(), Some("2-D-2"));
|
||||
assert_eq!(report.internal_stem.as_deref(), Some("2D2L"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_lco_header_and_lanes() {
|
||||
let mut bytes = vec![0u8; 0x58];
|
||||
bytes[0..4].copy_from_slice(&0x07d5u32.to_le_bytes());
|
||||
bytes[4..4 + 5].copy_from_slice(b"2D2L\0");
|
||||
bytes[0x20..0x24].copy_from_slice(&100u32.to_le_bytes());
|
||||
|
||||
let report = inspect_lco_bytes(&bytes).expect("lco should parse");
|
||||
assert_eq!(report.header_magic, Some(0x07d5));
|
||||
assert_eq!(report.internal_stem.as_deref(), Some("2D2L"));
|
||||
assert_eq!(report.early_lanes[0].raw_u32, 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_cgo_and_cct_files() {
|
||||
let cgo = inspect_cgo_bytes(b"\x00\x00\\BAuto_Carrier\0")
|
||||
.expect("cgo should parse");
|
||||
assert_eq!(cgo.content_stem.as_deref(), Some("Auto_Carrier"));
|
||||
|
||||
let cct = inspect_cct_bytes(b"Auto_Carrier 13\n").expect("cct should parse");
|
||||
assert_eq!(cct.identifier.as_deref(), Some("Auto_Carrier"));
|
||||
assert_eq!(cct.value, Some(13));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builds_locomotive_display_census() {
|
||||
let mut car_reports = BTreeMap::new();
|
||||
car_reports.insert(
|
||||
"2D2L.car".to_string(),
|
||||
EngineTypeCarInspectionReport {
|
||||
file_size: 0,
|
||||
header_magic: Some(0x03ea),
|
||||
header_magic_hex: Some("0x000003ea".to_string()),
|
||||
record_kind: Some(2),
|
||||
record_kind_hex: Some("0x00000002".to_string()),
|
||||
primary_display_name: Some("2-D-2".to_string()),
|
||||
content_name: Some("2D2L".to_string()),
|
||||
internal_stem: Some("2D2L".to_string()),
|
||||
notes: Vec::new(),
|
||||
},
|
||||
);
|
||||
let families = vec![EngineTypeFamilyEntry {
|
||||
canonical_stem: "2d2l".to_string(),
|
||||
car_file: Some("2D2L.car".to_string()),
|
||||
lco_file: Some("2D2L.lco".to_string()),
|
||||
cgo_file: None,
|
||||
cct_file: None,
|
||||
primary_display_name: Some("2-D-2".to_string()),
|
||||
content_name: Some("2D2L".to_string()),
|
||||
internal_stem: Some("2D2L".to_string()),
|
||||
cct_identifier: None,
|
||||
cct_value: None,
|
||||
has_matched_locomotive_pair: true,
|
||||
}];
|
||||
|
||||
let report =
|
||||
build_locomotive_display_census(Path::new("EngineTypes"), &families, &car_reports)
|
||||
.expect("census should build");
|
||||
assert_eq!(report.observed_locomotive_pair_count, 1);
|
||||
assert_eq!(report.entries[0].primary_display_name, "2-D-2");
|
||||
assert!(report.entries[0].matches_grounded_prefix_name);
|
||||
}
|
||||
}
|
||||
148
crates/rrt-runtime/src/inspect/imb.rs
Normal file
148
crates/rrt-runtime/src/inspect/imb.rs
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ImbInspectionEntry {
|
||||
pub line_number: usize,
|
||||
pub key: String,
|
||||
pub raw_value: String,
|
||||
pub tokens: Vec<String>,
|
||||
pub integer_values: Option<Vec<i64>>,
|
||||
pub float_values: Option<Vec<f64>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ImbInspectionReport {
|
||||
pub line_count: usize,
|
||||
pub entry_count: usize,
|
||||
pub blank_line_count: usize,
|
||||
pub malformed_line_count: usize,
|
||||
pub notes: Vec<String>,
|
||||
pub entries: Vec<ImbInspectionEntry>,
|
||||
pub malformed_lines: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn inspect_imb_file(path: &Path) -> Result<ImbInspectionReport, Box<dyn std::error::Error>> {
|
||||
let bytes = fs::read(path)?;
|
||||
inspect_imb_bytes(&bytes)
|
||||
}
|
||||
|
||||
pub fn inspect_imb_bytes(bytes: &[u8]) -> Result<ImbInspectionReport, Box<dyn std::error::Error>> {
|
||||
let text = decode_windows_1252(bytes);
|
||||
let mut entries = Vec::new();
|
||||
let mut malformed_lines = Vec::new();
|
||||
let mut blank_line_count = 0usize;
|
||||
|
||||
for (index, raw_line) in text.lines().enumerate() {
|
||||
let line_number = index + 1;
|
||||
let trimmed = raw_line.trim();
|
||||
if trimmed.is_empty() {
|
||||
blank_line_count += 1;
|
||||
continue;
|
||||
}
|
||||
let mut parts = trimmed.split_whitespace();
|
||||
let Some(key) = parts.next() else {
|
||||
blank_line_count += 1;
|
||||
continue;
|
||||
};
|
||||
let tokens = parts.map(|token| token.to_string()).collect::<Vec<_>>();
|
||||
if tokens.is_empty() {
|
||||
malformed_lines.push(raw_line.to_string());
|
||||
continue;
|
||||
}
|
||||
let integer_values = parse_i64_tokens(&tokens);
|
||||
let float_values = parse_f64_tokens(&tokens);
|
||||
entries.push(ImbInspectionEntry {
|
||||
line_number,
|
||||
key: key.to_string(),
|
||||
raw_value: tokens.join(" "),
|
||||
tokens,
|
||||
integer_values,
|
||||
float_values,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(ImbInspectionReport {
|
||||
line_count: text.lines().count(),
|
||||
entry_count: entries.len(),
|
||||
blank_line_count,
|
||||
malformed_line_count: malformed_lines.len(),
|
||||
notes: vec![
|
||||
"The current .imb parser preserves one whitespace-delimited key plus the remaining token list per line.".to_string(),
|
||||
"Integer and float projections are only populated when every token in the value lane parses cleanly.".to_string(),
|
||||
],
|
||||
entries,
|
||||
malformed_lines,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_i64_tokens(tokens: &[String]) -> Option<Vec<i64>> {
|
||||
tokens
|
||||
.iter()
|
||||
.map(|token| token.parse::<i64>().ok())
|
||||
.collect::<Option<Vec<_>>>()
|
||||
}
|
||||
|
||||
fn parse_f64_tokens(tokens: &[String]) -> Option<Vec<f64>> {
|
||||
tokens
|
||||
.iter()
|
||||
.map(|token| token.parse::<f64>().ok())
|
||||
.collect::<Option<Vec<_>>>()
|
||||
}
|
||||
|
||||
fn decode_windows_1252(bytes: &[u8]) -> String {
|
||||
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
|
||||
}
|
||||
|
||||
fn decode_windows_1252_byte(byte: u8) -> char {
|
||||
match byte {
|
||||
0x80 => '\u{20AC}',
|
||||
0x82 => '\u{201A}',
|
||||
0x83 => '\u{0192}',
|
||||
0x84 => '\u{201E}',
|
||||
0x85 => '\u{2026}',
|
||||
0x86 => '\u{2020}',
|
||||
0x87 => '\u{2021}',
|
||||
0x88 => '\u{02C6}',
|
||||
0x89 => '\u{2030}',
|
||||
0x8A => '\u{0160}',
|
||||
0x8B => '\u{2039}',
|
||||
0x8C => '\u{0152}',
|
||||
0x8E => '\u{017D}',
|
||||
0x91 => '\u{2018}',
|
||||
0x92 => '\u{2019}',
|
||||
0x93 => '\u{201C}',
|
||||
0x94 => '\u{201D}',
|
||||
0x95 => '\u{2022}',
|
||||
0x96 => '\u{2013}',
|
||||
0x97 => '\u{2014}',
|
||||
0x98 => '\u{02DC}',
|
||||
0x99 => '\u{2122}',
|
||||
0x9A => '\u{0161}',
|
||||
0x9B => '\u{203A}',
|
||||
0x9C => '\u{0153}',
|
||||
0x9E => '\u{017E}',
|
||||
0x9F => '\u{0178}',
|
||||
_ => byte as char,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parses_scalar_and_tuple_lines() {
|
||||
let report = inspect_imb_bytes(
|
||||
b"TGAName ICE_Profile\nTGAWidth 256\nImageWH 0 0 138 32\n",
|
||||
)
|
||||
.expect("imb should parse");
|
||||
|
||||
assert_eq!(report.entry_count, 3);
|
||||
assert_eq!(report.entries[0].key, "TGAName");
|
||||
assert_eq!(report.entries[1].integer_values, Some(vec![256]));
|
||||
assert_eq!(report.entries[2].integer_values, Some(vec![0, 0, 138, 32]));
|
||||
}
|
||||
}
|
||||
270
crates/rrt-runtime/src/inspect/lng.rs
Normal file
270
crates/rrt-runtime/src/inspect/lng.rs
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct LngInspectionEntry {
|
||||
pub line_number: usize,
|
||||
pub kind: String,
|
||||
pub string_id: Option<u32>,
|
||||
pub style_level: Option<u32>,
|
||||
pub raw_text: String,
|
||||
pub normalized_text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct LngMalformedLine {
|
||||
pub line_number: usize,
|
||||
pub raw_line: String,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct LngInspectionReport {
|
||||
pub format_family: String,
|
||||
pub line_count: usize,
|
||||
pub entry_count: usize,
|
||||
pub string_entry_count: usize,
|
||||
pub styled_entry_count: usize,
|
||||
pub comment_count: usize,
|
||||
pub blank_line_count: usize,
|
||||
pub duplicate_id_count: usize,
|
||||
pub duplicate_ids: Vec<u32>,
|
||||
pub malformed_line_count: usize,
|
||||
pub highest_string_id: Option<u32>,
|
||||
pub notes: Vec<String>,
|
||||
pub entries: Vec<LngInspectionEntry>,
|
||||
pub malformed_lines: Vec<LngMalformedLine>,
|
||||
}
|
||||
|
||||
pub fn inspect_lng_file(path: &Path) -> Result<LngInspectionReport, Box<dyn std::error::Error>> {
|
||||
let bytes = fs::read(path)?;
|
||||
inspect_lng_bytes(&bytes)
|
||||
}
|
||||
|
||||
pub fn inspect_lng_bytes(bytes: &[u8]) -> Result<LngInspectionReport, Box<dyn std::error::Error>> {
|
||||
let text = decode_windows_1252(bytes);
|
||||
let mut entries = Vec::new();
|
||||
let mut malformed_lines = Vec::new();
|
||||
let mut string_id_counts = BTreeMap::<u32, usize>::new();
|
||||
let mut comment_count = 0usize;
|
||||
let mut blank_line_count = 0usize;
|
||||
let mut string_entry_count = 0usize;
|
||||
let mut styled_entry_count = 0usize;
|
||||
|
||||
for (index, raw_line) in text.lines().enumerate() {
|
||||
let line_number = index + 1;
|
||||
let trimmed = raw_line.trim();
|
||||
if trimmed.is_empty() {
|
||||
blank_line_count += 1;
|
||||
continue;
|
||||
}
|
||||
if trimmed.starts_with(';') {
|
||||
comment_count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(entry) = parse_string_entry(line_number, raw_line) {
|
||||
string_entry_count += 1;
|
||||
if let Some(string_id) = entry.string_id {
|
||||
*string_id_counts.entry(string_id).or_default() += 1;
|
||||
}
|
||||
entries.push(entry);
|
||||
continue;
|
||||
}
|
||||
if let Some(entry) = parse_styled_entry(line_number, raw_line) {
|
||||
styled_entry_count += 1;
|
||||
entries.push(entry);
|
||||
continue;
|
||||
}
|
||||
|
||||
malformed_lines.push(LngMalformedLine {
|
||||
line_number,
|
||||
raw_line: raw_line.to_string(),
|
||||
reason: "line is neither a quoted string-id row nor a styled credits row".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let duplicate_ids = string_id_counts
|
||||
.into_iter()
|
||||
.filter_map(|(string_id, count)| (count > 1).then_some(string_id))
|
||||
.collect::<Vec<_>>();
|
||||
let highest_string_id = entries.iter().filter_map(|entry| entry.string_id).max();
|
||||
|
||||
let format_kinds = entries
|
||||
.iter()
|
||||
.map(|entry| entry.kind.as_str())
|
||||
.collect::<BTreeSet<_>>();
|
||||
let format_family = match (format_kinds.contains("string"), format_kinds.contains("styled")) {
|
||||
(true, false) => "quoted-string-table".to_string(),
|
||||
(false, true) => "styled-credits-lines".to_string(),
|
||||
(true, true) => "mixed-language-table".to_string(),
|
||||
(false, false) => "unclassified-language-text".to_string(),
|
||||
};
|
||||
|
||||
let mut notes = Vec::new();
|
||||
notes.push(
|
||||
"Quoted string rows preserve both the raw escape spelling and a normalized text view where `\\n` becomes a line break.".to_string(),
|
||||
);
|
||||
if format_kinds.contains("styled") {
|
||||
notes.push(
|
||||
"Styled rows use the observed `*<level>` credits format and preserve the style level separately from the rendered text.".to_string(),
|
||||
);
|
||||
}
|
||||
if !duplicate_ids.is_empty() {
|
||||
notes.push("Duplicate string ids are preserved explicitly instead of silently overwriting earlier rows.".to_string());
|
||||
}
|
||||
|
||||
Ok(LngInspectionReport {
|
||||
format_family,
|
||||
line_count: text.lines().count(),
|
||||
entry_count: entries.len(),
|
||||
string_entry_count,
|
||||
styled_entry_count,
|
||||
comment_count,
|
||||
blank_line_count,
|
||||
duplicate_id_count: duplicate_ids.len(),
|
||||
duplicate_ids,
|
||||
malformed_line_count: malformed_lines.len(),
|
||||
highest_string_id,
|
||||
notes,
|
||||
entries,
|
||||
malformed_lines,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_string_entry(line_number: usize, raw_line: &str) -> Option<LngInspectionEntry> {
|
||||
let trimmed = raw_line.trim_start();
|
||||
let digit_len = trimmed.chars().take_while(|ch| ch.is_ascii_digit()).count();
|
||||
if digit_len == 0 {
|
||||
return None;
|
||||
}
|
||||
let string_id = trimmed[..digit_len].parse().ok()?;
|
||||
let remainder = trimmed[digit_len..].trim_start();
|
||||
let raw_text = parse_quoted_payload(remainder)?;
|
||||
Some(LngInspectionEntry {
|
||||
line_number,
|
||||
kind: "string".to_string(),
|
||||
string_id: Some(string_id),
|
||||
style_level: None,
|
||||
normalized_text: normalize_lng_text(&raw_text),
|
||||
raw_text,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_styled_entry(line_number: usize, raw_line: &str) -> Option<LngInspectionEntry> {
|
||||
let trimmed = raw_line.trim_start();
|
||||
let remainder = trimmed.strip_prefix('*')?;
|
||||
let digit_len = remainder
|
||||
.chars()
|
||||
.take_while(|ch| ch.is_ascii_digit())
|
||||
.count();
|
||||
if digit_len == 0 {
|
||||
return None;
|
||||
}
|
||||
let style_level = remainder[..digit_len].parse().ok()?;
|
||||
let raw_text = remainder[digit_len..].trim_start().to_string();
|
||||
Some(LngInspectionEntry {
|
||||
line_number,
|
||||
kind: "styled".to_string(),
|
||||
string_id: None,
|
||||
style_level: Some(style_level),
|
||||
normalized_text: normalize_lng_text(&raw_text),
|
||||
raw_text,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_quoted_payload(text: &str) -> Option<String> {
|
||||
let trimmed = text.trim();
|
||||
if !(trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2) {
|
||||
return None;
|
||||
}
|
||||
Some(trimmed[1..trimmed.len() - 1].to_string())
|
||||
}
|
||||
|
||||
fn normalize_lng_text(text: &str) -> String {
|
||||
text.replace("\\n", "\n")
|
||||
}
|
||||
|
||||
fn decode_windows_1252(bytes: &[u8]) -> String {
|
||||
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
|
||||
}
|
||||
|
||||
fn decode_windows_1252_byte(byte: u8) -> char {
|
||||
match byte {
|
||||
0x80 => '\u{20AC}',
|
||||
0x82 => '\u{201A}',
|
||||
0x83 => '\u{0192}',
|
||||
0x84 => '\u{201E}',
|
||||
0x85 => '\u{2026}',
|
||||
0x86 => '\u{2020}',
|
||||
0x87 => '\u{2021}',
|
||||
0x88 => '\u{02C6}',
|
||||
0x89 => '\u{2030}',
|
||||
0x8A => '\u{0160}',
|
||||
0x8B => '\u{2039}',
|
||||
0x8C => '\u{0152}',
|
||||
0x8E => '\u{017D}',
|
||||
0x91 => '\u{2018}',
|
||||
0x92 => '\u{2019}',
|
||||
0x93 => '\u{201C}',
|
||||
0x94 => '\u{201D}',
|
||||
0x95 => '\u{2022}',
|
||||
0x96 => '\u{2013}',
|
||||
0x97 => '\u{2014}',
|
||||
0x98 => '\u{02DC}',
|
||||
0x99 => '\u{2122}',
|
||||
0x9A => '\u{0161}',
|
||||
0x9B => '\u{203A}',
|
||||
0x9C => '\u{0153}',
|
||||
0x9E => '\u{017E}',
|
||||
0x9F => '\u{0178}',
|
||||
_ => byte as char,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parses_standard_string_rows_and_comments() {
|
||||
let report = inspect_lng_bytes(b"; comment\n 10 \"Cancel\"\n11\t\"Line\\nBreak\"\n")
|
||||
.expect("lng should parse");
|
||||
|
||||
assert_eq!(report.format_family, "quoted-string-table");
|
||||
assert_eq!(report.comment_count, 1);
|
||||
assert_eq!(report.string_entry_count, 2);
|
||||
assert_eq!(report.highest_string_id, Some(11));
|
||||
assert_eq!(report.entries[1].normalized_text, "Line\nBreak");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_styled_credit_rows() {
|
||||
let report = inspect_lng_bytes(b"*3Railroad Tycoon 3\n*2Development\nPopTop\n")
|
||||
.expect("lng should parse");
|
||||
|
||||
assert_eq!(report.format_family, "styled-credits-lines");
|
||||
assert_eq!(report.styled_entry_count, 2);
|
||||
assert_eq!(report.malformed_line_count, 1);
|
||||
assert_eq!(report.entries[0].style_level, Some(3));
|
||||
assert_eq!(report.entries[0].raw_text, "Railroad Tycoon 3");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reports_duplicate_string_ids() {
|
||||
let report = inspect_lng_bytes(b"1 \"A\"\n1 \"B\"\n").expect("lng should parse");
|
||||
|
||||
assert_eq!(report.duplicate_id_count, 1);
|
||||
assert_eq!(report.duplicate_ids, vec![1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decodes_windows_1252_text() {
|
||||
let report = inspect_lng_bytes(b"1 \"Wait\x85\"\n").expect("lng should parse");
|
||||
|
||||
assert_eq!(report.entries[0].raw_text, "Wait…");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue