Add parsers for RT3 language and engine type assets

This commit is contained in:
Jan Petykiewicz 2026-04-21 22:10:04 -07:00
commit 61472bf72d
17 changed files with 32835 additions and 9 deletions

View file

@ -1,6 +1,9 @@
pub mod building;
pub mod campaign;
pub mod cargo;
pub mod engine_types;
pub mod imb;
pub mod lng;
pub mod pk4;
pub mod smp;
pub mod win;

View file

@ -0,0 +1,592 @@
use std::collections::BTreeMap;
use std::fs;
use std::path::Path;
use serde::{Deserialize, Serialize};
const CAR_PRIMARY_DISPLAY_NAME_OFFSET: usize = 0x0c;
const CAR_CONTENT_NAME_OFFSET: usize = 0x48;
const CAR_INTERNAL_STEM_OFFSET: usize = 0x84;
const LCO_INTERNAL_STEM_OFFSET: usize = 0x04;
const UNMATCHED_LOCOMOTIVE_DISPLAY_NAMES: [&str; 5] =
["242 A1", "Class 460", "Class A1", "Class P8", "Class QJ"];
const LCO_EARLY_LANE_OFFSETS: [usize; 14] = [
0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54,
];
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EngineTypeCarInspectionReport {
pub file_size: usize,
pub header_magic: Option<u32>,
pub header_magic_hex: Option<String>,
pub record_kind: Option<u32>,
pub record_kind_hex: Option<String>,
pub primary_display_name: Option<String>,
pub content_name: Option<String>,
pub internal_stem: Option<String>,
pub notes: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct EngineTypeRawLane {
pub offset: usize,
pub offset_hex: String,
pub raw_u32: u32,
pub raw_u32_hex: String,
pub raw_f32: f32,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct EngineTypeLcoInspectionReport {
pub file_size: usize,
pub header_magic: Option<u32>,
pub header_magic_hex: Option<String>,
pub internal_stem: Option<String>,
pub early_lanes: Vec<EngineTypeRawLane>,
pub notes: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct EngineTypeCgoInspectionReport {
pub file_size: usize,
pub leading_u32: Option<u32>,
pub leading_u32_hex: Option<String>,
pub leading_f32: Option<f32>,
pub content_stem: Option<String>,
pub notes: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EngineTypeCctInspectionReport {
pub file_size: usize,
pub line_count: usize,
pub identifier: Option<String>,
pub value: Option<i64>,
pub raw_lines: Vec<String>,
pub notes: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EngineTypeLocomotiveDisplayEntry {
pub car_file: String,
pub lco_file: String,
pub primary_display_name: String,
pub content_name: String,
pub internal_stem: String,
pub matches_grounded_prefix_name: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EngineTypeLocomotiveDisplayFamily {
pub car_file: String,
pub lco_file: String,
pub primary_display_name: String,
pub content_name: String,
pub internal_stem: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EngineTypeLocomotiveDisplayCensusReport {
pub format_version: u32,
pub semantic_family: String,
pub source_root: String,
pub car_header_layout: BTreeMap<String, String>,
pub observed_locomotive_pair_count: usize,
pub grounded_prefix_count: usize,
pub grounded_prefix_match_count: usize,
pub unmatched_display_family_count: usize,
pub unmatched_display_families: Vec<EngineTypeLocomotiveDisplayFamily>,
pub entries: Vec<EngineTypeLocomotiveDisplayEntry>,
pub notes: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EngineTypeFamilyEntry {
pub canonical_stem: String,
pub car_file: Option<String>,
pub lco_file: Option<String>,
pub cgo_file: Option<String>,
pub cct_file: Option<String>,
pub primary_display_name: Option<String>,
pub content_name: Option<String>,
pub internal_stem: Option<String>,
pub cct_identifier: Option<String>,
pub cct_value: Option<i64>,
pub has_matched_locomotive_pair: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EngineTypesInspectionReport {
pub source_root: String,
pub family_count: usize,
pub car_file_count: usize,
pub lco_file_count: usize,
pub cgo_file_count: usize,
pub cct_file_count: usize,
pub matched_locomotive_pair_count: usize,
pub unmatched_car_file_count: usize,
pub unmatched_lco_file_count: usize,
pub unmatched_cgo_file_count: usize,
pub unmatched_cct_file_count: usize,
pub locomotive_display_census: EngineTypeLocomotiveDisplayCensusReport,
pub families: Vec<EngineTypeFamilyEntry>,
}
pub fn inspect_car_file(
path: &Path,
) -> Result<EngineTypeCarInspectionReport, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
inspect_car_bytes(&bytes)
}
pub fn inspect_car_bytes(
bytes: &[u8],
) -> Result<EngineTypeCarInspectionReport, Box<dyn std::error::Error>> {
Ok(EngineTypeCarInspectionReport {
file_size: bytes.len(),
header_magic: read_u32_le(bytes, 0),
header_magic_hex: read_u32_le(bytes, 0).map(|value| format!("0x{value:08x}")),
record_kind: read_u32_le(bytes, 4),
record_kind_hex: read_u32_le(bytes, 4).map(|value| format!("0x{value:08x}")),
primary_display_name: read_ascii_field(bytes, CAR_PRIMARY_DISPLAY_NAME_OFFSET),
content_name: read_ascii_field(bytes, CAR_CONTENT_NAME_OFFSET),
internal_stem: read_ascii_field(bytes, CAR_INTERNAL_STEM_OFFSET),
notes: vec![
"The current .car parser exposes the fixed header fields already grounded by the checked locomotive display census.".to_string(),
],
})
}
pub fn inspect_lco_file(
path: &Path,
) -> Result<EngineTypeLcoInspectionReport, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
inspect_lco_bytes(&bytes)
}
pub fn inspect_lco_bytes(
bytes: &[u8],
) -> Result<EngineTypeLcoInspectionReport, Box<dyn std::error::Error>> {
let early_lanes = LCO_EARLY_LANE_OFFSETS
.iter()
.filter_map(|offset| {
let raw_u32 = read_u32_le(bytes, *offset)?;
Some(EngineTypeRawLane {
offset: *offset,
offset_hex: format!("0x{offset:04x}"),
raw_u32,
raw_u32_hex: format!("0x{raw_u32:08x}"),
raw_f32: f32::from_bits(raw_u32),
})
})
.collect::<Vec<_>>();
Ok(EngineTypeLcoInspectionReport {
file_size: bytes.len(),
header_magic: read_u32_le(bytes, 0),
header_magic_hex: read_u32_le(bytes, 0).map(|value| format!("0x{value:08x}")),
internal_stem: read_ascii_field(bytes, LCO_INTERNAL_STEM_OFFSET),
early_lanes,
notes: vec![
"The current .lco parser exposes the fixed stem at 0x04 plus the early raw lane block without asserting gameplay semantics for those numeric fields.".to_string(),
],
})
}
pub fn inspect_cgo_file(
path: &Path,
) -> Result<EngineTypeCgoInspectionReport, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
inspect_cgo_bytes(&bytes)
}
pub fn inspect_cgo_bytes(
bytes: &[u8],
) -> Result<EngineTypeCgoInspectionReport, Box<dyn std::error::Error>> {
let leading_u32 = read_u32_le(bytes, 0);
Ok(EngineTypeCgoInspectionReport {
file_size: bytes.len(),
leading_u32,
leading_u32_hex: leading_u32.map(|value| format!("0x{value:08x}")),
leading_f32: leading_u32.map(f32::from_bits),
content_stem: read_ascii_field(bytes, 4),
notes: vec![
"The current .cgo parser is intentionally conservative: it exposes the leading scalar lane plus the inline content stem without overclaiming the remaining payload layout.".to_string(),
],
})
}
pub fn inspect_cct_file(
path: &Path,
) -> Result<EngineTypeCctInspectionReport, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
inspect_cct_bytes(&bytes)
}
pub fn inspect_cct_bytes(
bytes: &[u8],
) -> Result<EngineTypeCctInspectionReport, Box<dyn std::error::Error>> {
let text = decode_windows_1252(bytes);
let raw_lines = text.lines().map(|line| line.to_string()).collect::<Vec<_>>();
let first_nonblank = raw_lines.iter().find(|line| !line.trim().is_empty()).cloned();
let (identifier, value) = first_nonblank
.as_deref()
.map(parse_cct_row)
.unwrap_or((None, None));
Ok(EngineTypeCctInspectionReport {
file_size: bytes.len(),
line_count: raw_lines.len(),
identifier,
value,
raw_lines,
notes: vec![
"The current .cct parser preserves the first observed identifier/value row and the raw text lines without claiming wider semantics yet.".to_string(),
],
})
}
pub fn inspect_engine_types_dir(
path: &Path,
) -> Result<EngineTypesInspectionReport, Box<dyn std::error::Error>> {
let mut families = BTreeMap::<String, EngineTypeFamilyBuilder>::new();
let mut car_reports = BTreeMap::<String, EngineTypeCarInspectionReport>::new();
let mut lco_reports = BTreeMap::<String, EngineTypeLcoInspectionReport>::new();
let mut cgo_reports = BTreeMap::<String, EngineTypeCgoInspectionReport>::new();
let mut cct_reports = BTreeMap::<String, EngineTypeCctInspectionReport>::new();
for entry in fs::read_dir(path)? {
let entry = entry?;
if !entry.file_type()?.is_file() {
continue;
}
let file_name = entry.file_name().to_string_lossy().into_owned();
let Some(stem) = Path::new(&file_name)
.file_stem()
.and_then(|stem| stem.to_str())
.map(|stem| stem.to_string())
else {
continue;
};
let Some(extension) = Path::new(&file_name)
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.to_ascii_lowercase())
else {
continue;
};
let family = families.entry(stem.to_ascii_lowercase()).or_default();
family.canonical_stem = stem.to_ascii_lowercase();
match extension.as_str() {
"car" => {
family.car_file = Some(file_name.clone());
car_reports.insert(file_name.clone(), inspect_car_file(&entry.path())?);
}
"lco" => {
family.lco_file = Some(file_name.clone());
lco_reports.insert(file_name.clone(), inspect_lco_file(&entry.path())?);
}
"cgo" => {
family.cgo_file = Some(file_name.clone());
cgo_reports.insert(file_name.clone(), inspect_cgo_file(&entry.path())?);
}
"cct" => {
family.cct_file = Some(file_name.clone());
cct_reports.insert(file_name.clone(), inspect_cct_file(&entry.path())?);
}
_ => {}
}
}
let family_entries = families
.values()
.map(|family| build_family_entry(family, &car_reports, &cct_reports))
.collect::<Vec<_>>();
let matched_locomotive_pair_count = family_entries
.iter()
.filter(|family| family.has_matched_locomotive_pair)
.count();
let locomotive_display_census =
build_locomotive_display_census(path, &family_entries, &car_reports)?;
Ok(EngineTypesInspectionReport {
source_root: path.display().to_string(),
family_count: family_entries.len(),
car_file_count: family_entries.iter().filter(|entry| entry.car_file.is_some()).count(),
lco_file_count: family_entries.iter().filter(|entry| entry.lco_file.is_some()).count(),
cgo_file_count: family_entries.iter().filter(|entry| entry.cgo_file.is_some()).count(),
cct_file_count: family_entries.iter().filter(|entry| entry.cct_file.is_some()).count(),
matched_locomotive_pair_count,
unmatched_car_file_count: family_entries
.iter()
.filter(|entry| entry.car_file.is_some() && entry.lco_file.is_none())
.count(),
unmatched_lco_file_count: family_entries
.iter()
.filter(|entry| entry.car_file.is_none() && entry.lco_file.is_some())
.count(),
unmatched_cgo_file_count: family_entries
.iter()
.filter(|entry| entry.cgo_file.is_some() && !(entry.car_file.is_some() || entry.lco_file.is_some()))
.count(),
unmatched_cct_file_count: family_entries
.iter()
.filter(|entry| entry.cct_file.is_some() && !(entry.car_file.is_some() || entry.lco_file.is_some()))
.count(),
locomotive_display_census,
families: family_entries,
})
}
#[derive(Default)]
struct EngineTypeFamilyBuilder {
canonical_stem: String,
car_file: Option<String>,
lco_file: Option<String>,
cgo_file: Option<String>,
cct_file: Option<String>,
}
fn build_family_entry(
family: &EngineTypeFamilyBuilder,
car_reports: &BTreeMap<String, EngineTypeCarInspectionReport>,
cct_reports: &BTreeMap<String, EngineTypeCctInspectionReport>,
) -> EngineTypeFamilyEntry {
let car_report = family
.car_file
.as_ref()
.and_then(|file_name| car_reports.get(file_name));
let cct_report = family
.cct_file
.as_ref()
.and_then(|file_name| cct_reports.get(file_name));
EngineTypeFamilyEntry {
canonical_stem: family.canonical_stem.clone(),
car_file: family.car_file.clone(),
lco_file: family.lco_file.clone(),
cgo_file: family.cgo_file.clone(),
cct_file: family.cct_file.clone(),
primary_display_name: car_report.and_then(|report| report.primary_display_name.clone()),
content_name: car_report.and_then(|report| report.content_name.clone()),
internal_stem: car_report.and_then(|report| report.internal_stem.clone()),
cct_identifier: cct_report.and_then(|report| report.identifier.clone()),
cct_value: cct_report.and_then(|report| report.value),
has_matched_locomotive_pair: family.car_file.is_some() && family.lco_file.is_some(),
}
}
fn build_locomotive_display_census(
path: &Path,
families: &[EngineTypeFamilyEntry],
car_reports: &BTreeMap<String, EngineTypeCarInspectionReport>,
) -> Result<EngineTypeLocomotiveDisplayCensusReport, Box<dyn std::error::Error>> {
let mut entries = families
.iter()
.filter_map(|family| {
let car_file = family.car_file.clone()?;
let lco_file = family.lco_file.clone()?;
let car_report = car_reports.get(&car_file)?;
Some(EngineTypeLocomotiveDisplayEntry {
car_file: car_file.clone(),
lco_file,
primary_display_name: car_report.primary_display_name.clone().unwrap_or_default(),
content_name: car_report.content_name.clone().unwrap_or_default(),
internal_stem: car_report.internal_stem.clone().unwrap_or_default(),
matches_grounded_prefix_name: !UNMATCHED_LOCOMOTIVE_DISPLAY_NAMES
.contains(&car_report.primary_display_name.as_deref().unwrap_or("")),
})
})
.collect::<Vec<_>>();
entries.sort_by(|left, right| left.car_file.cmp(&right.car_file));
let unmatched_display_families = entries
.iter()
.filter(|entry| !entry.matches_grounded_prefix_name)
.map(|entry| EngineTypeLocomotiveDisplayFamily {
car_file: entry.car_file.clone(),
lco_file: entry.lco_file.clone(),
primary_display_name: entry.primary_display_name.clone(),
content_name: entry.content_name.clone(),
internal_stem: entry.internal_stem.clone(),
})
.collect::<Vec<_>>();
let grounded_prefix_count = entries
.iter()
.filter(|entry| entry.matches_grounded_prefix_name)
.count();
let mut car_header_layout = BTreeMap::new();
car_header_layout.insert("format_version_dword_offset".to_string(), "0x00".to_string());
car_header_layout.insert("record_kind_dword_offset".to_string(), "0x04".to_string());
car_header_layout.insert(
"primary_display_name_offset".to_string(),
format!("0x{CAR_PRIMARY_DISPLAY_NAME_OFFSET:02x}"),
);
car_header_layout.insert(
"content_name_offset".to_string(),
format!("0x{CAR_CONTENT_NAME_OFFSET:02x}"),
);
car_header_layout.insert(
"internal_stem_offset".to_string(),
format!("0x{CAR_INTERNAL_STEM_OFFSET:02x}"),
);
Ok(EngineTypeLocomotiveDisplayCensusReport {
format_version: 1,
semantic_family: "engine-type-locomotive-display-census".to_string(),
source_root: path.display().to_string(),
car_header_layout,
observed_locomotive_pair_count: entries.len(),
grounded_prefix_count,
grounded_prefix_match_count: grounded_prefix_count,
unmatched_display_family_count: unmatched_display_families.len(),
unmatched_display_families,
entries,
notes: vec![
"Each row comes from one shipped .car/.lco locomotive engine-type pair under Data/EngineTypes.".to_string(),
"The primary display string is parsed directly from the .car header at 0x0c rather than inferred from strings output.".to_string(),
"The five unmatched display families are shipped named locomotive assets whose names do not appear in the current 61-name grounded descriptor prefix.".to_string(),
"This export grounds the extra shipped locomotive-name cohort, but it does not by itself prove where those names land in the live ordinal catalog or descriptor bands.".to_string(),
],
})
}
fn read_u32_le(bytes: &[u8], offset: usize) -> Option<u32> {
let slice = bytes.get(offset..offset + 4)?;
Some(u32::from_le_bytes(slice.try_into().ok()?))
}
fn read_ascii_field(bytes: &[u8], offset: usize) -> Option<String> {
let tail = bytes.get(offset..)?;
let end = tail
.iter()
.position(|byte| *byte == 0 || !byte.is_ascii() || *byte == 0xcd)
.unwrap_or(tail.len());
let value = String::from_utf8(tail[..end].to_vec()).ok()?;
(!value.is_empty()).then_some(value)
}
fn parse_cct_row(line: &str) -> (Option<String>, Option<i64>) {
let mut parts = line.split_whitespace();
let identifier = parts.next().map(|value| value.to_string());
let value = parts.next().and_then(|value| value.parse().ok());
(identifier, value)
}
fn decode_windows_1252(bytes: &[u8]) -> String {
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
}
fn decode_windows_1252_byte(byte: u8) -> char {
match byte {
0x80 => '\u{20AC}',
0x82 => '\u{201A}',
0x83 => '\u{0192}',
0x84 => '\u{201E}',
0x85 => '\u{2026}',
0x86 => '\u{2020}',
0x87 => '\u{2021}',
0x88 => '\u{02C6}',
0x89 => '\u{2030}',
0x8A => '\u{0160}',
0x8B => '\u{2039}',
0x8C => '\u{0152}',
0x8E => '\u{017D}',
0x91 => '\u{2018}',
0x92 => '\u{2019}',
0x93 => '\u{201C}',
0x94 => '\u{201D}',
0x95 => '\u{2022}',
0x96 => '\u{2013}',
0x97 => '\u{2014}',
0x98 => '\u{02DC}',
0x99 => '\u{2122}',
0x9A => '\u{0161}',
0x9B => '\u{203A}',
0x9C => '\u{0153}',
0x9E => '\u{017E}',
0x9F => '\u{0178}',
_ => byte as char,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_car_header_fields() {
let mut bytes = vec![0u8; 0x90];
bytes[0..4].copy_from_slice(&0x03eau32.to_le_bytes());
bytes[4..8].copy_from_slice(&2u32.to_le_bytes());
bytes[0x0c..0x0c + 6].copy_from_slice(b"2-D-2\0");
bytes[0x48..0x48 + 5].copy_from_slice(b"2D2L\0");
bytes[0x84..0x84 + 5].copy_from_slice(b"2D2L\0");
let report = inspect_car_bytes(&bytes).expect("car should parse");
assert_eq!(report.header_magic, Some(0x03ea));
assert_eq!(report.primary_display_name.as_deref(), Some("2-D-2"));
assert_eq!(report.internal_stem.as_deref(), Some("2D2L"));
}
#[test]
fn parses_lco_header_and_lanes() {
let mut bytes = vec![0u8; 0x58];
bytes[0..4].copy_from_slice(&0x07d5u32.to_le_bytes());
bytes[4..4 + 5].copy_from_slice(b"2D2L\0");
bytes[0x20..0x24].copy_from_slice(&100u32.to_le_bytes());
let report = inspect_lco_bytes(&bytes).expect("lco should parse");
assert_eq!(report.header_magic, Some(0x07d5));
assert_eq!(report.internal_stem.as_deref(), Some("2D2L"));
assert_eq!(report.early_lanes[0].raw_u32, 100);
}
#[test]
fn parses_cgo_and_cct_files() {
let cgo = inspect_cgo_bytes(b"\x00\x00\\BAuto_Carrier\0")
.expect("cgo should parse");
assert_eq!(cgo.content_stem.as_deref(), Some("Auto_Carrier"));
let cct = inspect_cct_bytes(b"Auto_Carrier 13\n").expect("cct should parse");
assert_eq!(cct.identifier.as_deref(), Some("Auto_Carrier"));
assert_eq!(cct.value, Some(13));
}
#[test]
fn builds_locomotive_display_census() {
let mut car_reports = BTreeMap::new();
car_reports.insert(
"2D2L.car".to_string(),
EngineTypeCarInspectionReport {
file_size: 0,
header_magic: Some(0x03ea),
header_magic_hex: Some("0x000003ea".to_string()),
record_kind: Some(2),
record_kind_hex: Some("0x00000002".to_string()),
primary_display_name: Some("2-D-2".to_string()),
content_name: Some("2D2L".to_string()),
internal_stem: Some("2D2L".to_string()),
notes: Vec::new(),
},
);
let families = vec![EngineTypeFamilyEntry {
canonical_stem: "2d2l".to_string(),
car_file: Some("2D2L.car".to_string()),
lco_file: Some("2D2L.lco".to_string()),
cgo_file: None,
cct_file: None,
primary_display_name: Some("2-D-2".to_string()),
content_name: Some("2D2L".to_string()),
internal_stem: Some("2D2L".to_string()),
cct_identifier: None,
cct_value: None,
has_matched_locomotive_pair: true,
}];
let report =
build_locomotive_display_census(Path::new("EngineTypes"), &families, &car_reports)
.expect("census should build");
assert_eq!(report.observed_locomotive_pair_count, 1);
assert_eq!(report.entries[0].primary_display_name, "2-D-2");
assert!(report.entries[0].matches_grounded_prefix_name);
}
}

View file

@ -0,0 +1,148 @@
use std::fs;
use std::path::Path;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ImbInspectionEntry {
pub line_number: usize,
pub key: String,
pub raw_value: String,
pub tokens: Vec<String>,
pub integer_values: Option<Vec<i64>>,
pub float_values: Option<Vec<f64>>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ImbInspectionReport {
pub line_count: usize,
pub entry_count: usize,
pub blank_line_count: usize,
pub malformed_line_count: usize,
pub notes: Vec<String>,
pub entries: Vec<ImbInspectionEntry>,
pub malformed_lines: Vec<String>,
}
pub fn inspect_imb_file(path: &Path) -> Result<ImbInspectionReport, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
inspect_imb_bytes(&bytes)
}
pub fn inspect_imb_bytes(bytes: &[u8]) -> Result<ImbInspectionReport, Box<dyn std::error::Error>> {
let text = decode_windows_1252(bytes);
let mut entries = Vec::new();
let mut malformed_lines = Vec::new();
let mut blank_line_count = 0usize;
for (index, raw_line) in text.lines().enumerate() {
let line_number = index + 1;
let trimmed = raw_line.trim();
if trimmed.is_empty() {
blank_line_count += 1;
continue;
}
let mut parts = trimmed.split_whitespace();
let Some(key) = parts.next() else {
blank_line_count += 1;
continue;
};
let tokens = parts.map(|token| token.to_string()).collect::<Vec<_>>();
if tokens.is_empty() {
malformed_lines.push(raw_line.to_string());
continue;
}
let integer_values = parse_i64_tokens(&tokens);
let float_values = parse_f64_tokens(&tokens);
entries.push(ImbInspectionEntry {
line_number,
key: key.to_string(),
raw_value: tokens.join(" "),
tokens,
integer_values,
float_values,
});
}
Ok(ImbInspectionReport {
line_count: text.lines().count(),
entry_count: entries.len(),
blank_line_count,
malformed_line_count: malformed_lines.len(),
notes: vec![
"The current .imb parser preserves one whitespace-delimited key plus the remaining token list per line.".to_string(),
"Integer and float projections are only populated when every token in the value lane parses cleanly.".to_string(),
],
entries,
malformed_lines,
})
}
fn parse_i64_tokens(tokens: &[String]) -> Option<Vec<i64>> {
tokens
.iter()
.map(|token| token.parse::<i64>().ok())
.collect::<Option<Vec<_>>>()
}
fn parse_f64_tokens(tokens: &[String]) -> Option<Vec<f64>> {
tokens
.iter()
.map(|token| token.parse::<f64>().ok())
.collect::<Option<Vec<_>>>()
}
fn decode_windows_1252(bytes: &[u8]) -> String {
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
}
fn decode_windows_1252_byte(byte: u8) -> char {
match byte {
0x80 => '\u{20AC}',
0x82 => '\u{201A}',
0x83 => '\u{0192}',
0x84 => '\u{201E}',
0x85 => '\u{2026}',
0x86 => '\u{2020}',
0x87 => '\u{2021}',
0x88 => '\u{02C6}',
0x89 => '\u{2030}',
0x8A => '\u{0160}',
0x8B => '\u{2039}',
0x8C => '\u{0152}',
0x8E => '\u{017D}',
0x91 => '\u{2018}',
0x92 => '\u{2019}',
0x93 => '\u{201C}',
0x94 => '\u{201D}',
0x95 => '\u{2022}',
0x96 => '\u{2013}',
0x97 => '\u{2014}',
0x98 => '\u{02DC}',
0x99 => '\u{2122}',
0x9A => '\u{0161}',
0x9B => '\u{203A}',
0x9C => '\u{0153}',
0x9E => '\u{017E}',
0x9F => '\u{0178}',
_ => byte as char,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_scalar_and_tuple_lines() {
let report = inspect_imb_bytes(
b"TGAName ICE_Profile\nTGAWidth 256\nImageWH 0 0 138 32\n",
)
.expect("imb should parse");
assert_eq!(report.entry_count, 3);
assert_eq!(report.entries[0].key, "TGAName");
assert_eq!(report.entries[1].integer_values, Some(vec![256]));
assert_eq!(report.entries[2].integer_values, Some(vec![0, 0, 138, 32]));
}
}

View file

@ -0,0 +1,270 @@
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::Path;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct LngInspectionEntry {
pub line_number: usize,
pub kind: String,
pub string_id: Option<u32>,
pub style_level: Option<u32>,
pub raw_text: String,
pub normalized_text: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct LngMalformedLine {
pub line_number: usize,
pub raw_line: String,
pub reason: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct LngInspectionReport {
pub format_family: String,
pub line_count: usize,
pub entry_count: usize,
pub string_entry_count: usize,
pub styled_entry_count: usize,
pub comment_count: usize,
pub blank_line_count: usize,
pub duplicate_id_count: usize,
pub duplicate_ids: Vec<u32>,
pub malformed_line_count: usize,
pub highest_string_id: Option<u32>,
pub notes: Vec<String>,
pub entries: Vec<LngInspectionEntry>,
pub malformed_lines: Vec<LngMalformedLine>,
}
pub fn inspect_lng_file(path: &Path) -> Result<LngInspectionReport, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
inspect_lng_bytes(&bytes)
}
pub fn inspect_lng_bytes(bytes: &[u8]) -> Result<LngInspectionReport, Box<dyn std::error::Error>> {
let text = decode_windows_1252(bytes);
let mut entries = Vec::new();
let mut malformed_lines = Vec::new();
let mut string_id_counts = BTreeMap::<u32, usize>::new();
let mut comment_count = 0usize;
let mut blank_line_count = 0usize;
let mut string_entry_count = 0usize;
let mut styled_entry_count = 0usize;
for (index, raw_line) in text.lines().enumerate() {
let line_number = index + 1;
let trimmed = raw_line.trim();
if trimmed.is_empty() {
blank_line_count += 1;
continue;
}
if trimmed.starts_with(';') {
comment_count += 1;
continue;
}
if let Some(entry) = parse_string_entry(line_number, raw_line) {
string_entry_count += 1;
if let Some(string_id) = entry.string_id {
*string_id_counts.entry(string_id).or_default() += 1;
}
entries.push(entry);
continue;
}
if let Some(entry) = parse_styled_entry(line_number, raw_line) {
styled_entry_count += 1;
entries.push(entry);
continue;
}
malformed_lines.push(LngMalformedLine {
line_number,
raw_line: raw_line.to_string(),
reason: "line is neither a quoted string-id row nor a styled credits row".to_string(),
});
}
let duplicate_ids = string_id_counts
.into_iter()
.filter_map(|(string_id, count)| (count > 1).then_some(string_id))
.collect::<Vec<_>>();
let highest_string_id = entries.iter().filter_map(|entry| entry.string_id).max();
let format_kinds = entries
.iter()
.map(|entry| entry.kind.as_str())
.collect::<BTreeSet<_>>();
let format_family = match (format_kinds.contains("string"), format_kinds.contains("styled")) {
(true, false) => "quoted-string-table".to_string(),
(false, true) => "styled-credits-lines".to_string(),
(true, true) => "mixed-language-table".to_string(),
(false, false) => "unclassified-language-text".to_string(),
};
let mut notes = Vec::new();
notes.push(
"Quoted string rows preserve both the raw escape spelling and a normalized text view where `\\n` becomes a line break.".to_string(),
);
if format_kinds.contains("styled") {
notes.push(
"Styled rows use the observed `*<level>` credits format and preserve the style level separately from the rendered text.".to_string(),
);
}
if !duplicate_ids.is_empty() {
notes.push("Duplicate string ids are preserved explicitly instead of silently overwriting earlier rows.".to_string());
}
Ok(LngInspectionReport {
format_family,
line_count: text.lines().count(),
entry_count: entries.len(),
string_entry_count,
styled_entry_count,
comment_count,
blank_line_count,
duplicate_id_count: duplicate_ids.len(),
duplicate_ids,
malformed_line_count: malformed_lines.len(),
highest_string_id,
notes,
entries,
malformed_lines,
})
}
fn parse_string_entry(line_number: usize, raw_line: &str) -> Option<LngInspectionEntry> {
let trimmed = raw_line.trim_start();
let digit_len = trimmed.chars().take_while(|ch| ch.is_ascii_digit()).count();
if digit_len == 0 {
return None;
}
let string_id = trimmed[..digit_len].parse().ok()?;
let remainder = trimmed[digit_len..].trim_start();
let raw_text = parse_quoted_payload(remainder)?;
Some(LngInspectionEntry {
line_number,
kind: "string".to_string(),
string_id: Some(string_id),
style_level: None,
normalized_text: normalize_lng_text(&raw_text),
raw_text,
})
}
fn parse_styled_entry(line_number: usize, raw_line: &str) -> Option<LngInspectionEntry> {
let trimmed = raw_line.trim_start();
let remainder = trimmed.strip_prefix('*')?;
let digit_len = remainder
.chars()
.take_while(|ch| ch.is_ascii_digit())
.count();
if digit_len == 0 {
return None;
}
let style_level = remainder[..digit_len].parse().ok()?;
let raw_text = remainder[digit_len..].trim_start().to_string();
Some(LngInspectionEntry {
line_number,
kind: "styled".to_string(),
string_id: None,
style_level: Some(style_level),
normalized_text: normalize_lng_text(&raw_text),
raw_text,
})
}
fn parse_quoted_payload(text: &str) -> Option<String> {
let trimmed = text.trim();
if !(trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2) {
return None;
}
Some(trimmed[1..trimmed.len() - 1].to_string())
}
fn normalize_lng_text(text: &str) -> String {
text.replace("\\n", "\n")
}
fn decode_windows_1252(bytes: &[u8]) -> String {
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
}
fn decode_windows_1252_byte(byte: u8) -> char {
match byte {
0x80 => '\u{20AC}',
0x82 => '\u{201A}',
0x83 => '\u{0192}',
0x84 => '\u{201E}',
0x85 => '\u{2026}',
0x86 => '\u{2020}',
0x87 => '\u{2021}',
0x88 => '\u{02C6}',
0x89 => '\u{2030}',
0x8A => '\u{0160}',
0x8B => '\u{2039}',
0x8C => '\u{0152}',
0x8E => '\u{017D}',
0x91 => '\u{2018}',
0x92 => '\u{2019}',
0x93 => '\u{201C}',
0x94 => '\u{201D}',
0x95 => '\u{2022}',
0x96 => '\u{2013}',
0x97 => '\u{2014}',
0x98 => '\u{02DC}',
0x99 => '\u{2122}',
0x9A => '\u{0161}',
0x9B => '\u{203A}',
0x9C => '\u{0153}',
0x9E => '\u{017E}',
0x9F => '\u{0178}',
_ => byte as char,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_standard_string_rows_and_comments() {
let report = inspect_lng_bytes(b"; comment\n 10 \"Cancel\"\n11\t\"Line\\nBreak\"\n")
.expect("lng should parse");
assert_eq!(report.format_family, "quoted-string-table");
assert_eq!(report.comment_count, 1);
assert_eq!(report.string_entry_count, 2);
assert_eq!(report.highest_string_id, Some(11));
assert_eq!(report.entries[1].normalized_text, "Line\nBreak");
}
#[test]
fn parses_styled_credit_rows() {
let report = inspect_lng_bytes(b"*3Railroad Tycoon 3\n*2Development\nPopTop\n")
.expect("lng should parse");
assert_eq!(report.format_family, "styled-credits-lines");
assert_eq!(report.styled_entry_count, 2);
assert_eq!(report.malformed_line_count, 1);
assert_eq!(report.entries[0].style_level, Some(3));
assert_eq!(report.entries[0].raw_text, "Railroad Tycoon 3");
}
#[test]
fn reports_duplicate_string_ids() {
let report = inspect_lng_bytes(b"1 \"A\"\n1 \"B\"\n").expect("lng should parse");
assert_eq!(report.duplicate_id_count, 1);
assert_eq!(report.duplicate_ids, vec![1]);
}
#[test]
fn decodes_windows_1252_text() {
let report = inspect_lng_bytes(b"1 \"Wait\x85\"\n").expect("lng should parse");
assert_eq!(report.entries[0].raw_text, "Wait…");
}
}