270 lines
8.5 KiB
Rust
270 lines
8.5 KiB
Rust
use std::collections::{BTreeMap, BTreeSet};
|
|
use std::fs;
|
|
use std::path::Path;
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct LngInspectionEntry {
|
|
pub line_number: usize,
|
|
pub kind: String,
|
|
pub string_id: Option<u32>,
|
|
pub style_level: Option<u32>,
|
|
pub raw_text: String,
|
|
pub normalized_text: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct LngMalformedLine {
|
|
pub line_number: usize,
|
|
pub raw_line: String,
|
|
pub reason: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct LngInspectionReport {
|
|
pub format_family: String,
|
|
pub line_count: usize,
|
|
pub entry_count: usize,
|
|
pub string_entry_count: usize,
|
|
pub styled_entry_count: usize,
|
|
pub comment_count: usize,
|
|
pub blank_line_count: usize,
|
|
pub duplicate_id_count: usize,
|
|
pub duplicate_ids: Vec<u32>,
|
|
pub malformed_line_count: usize,
|
|
pub highest_string_id: Option<u32>,
|
|
pub notes: Vec<String>,
|
|
pub entries: Vec<LngInspectionEntry>,
|
|
pub malformed_lines: Vec<LngMalformedLine>,
|
|
}
|
|
|
|
pub fn inspect_lng_file(path: &Path) -> Result<LngInspectionReport, Box<dyn std::error::Error>> {
|
|
let bytes = fs::read(path)?;
|
|
inspect_lng_bytes(&bytes)
|
|
}
|
|
|
|
pub fn inspect_lng_bytes(bytes: &[u8]) -> Result<LngInspectionReport, Box<dyn std::error::Error>> {
|
|
let text = decode_windows_1252(bytes);
|
|
let mut entries = Vec::new();
|
|
let mut malformed_lines = Vec::new();
|
|
let mut string_id_counts = BTreeMap::<u32, usize>::new();
|
|
let mut comment_count = 0usize;
|
|
let mut blank_line_count = 0usize;
|
|
let mut string_entry_count = 0usize;
|
|
let mut styled_entry_count = 0usize;
|
|
|
|
for (index, raw_line) in text.lines().enumerate() {
|
|
let line_number = index + 1;
|
|
let trimmed = raw_line.trim();
|
|
if trimmed.is_empty() {
|
|
blank_line_count += 1;
|
|
continue;
|
|
}
|
|
if trimmed.starts_with(';') {
|
|
comment_count += 1;
|
|
continue;
|
|
}
|
|
|
|
if let Some(entry) = parse_string_entry(line_number, raw_line) {
|
|
string_entry_count += 1;
|
|
if let Some(string_id) = entry.string_id {
|
|
*string_id_counts.entry(string_id).or_default() += 1;
|
|
}
|
|
entries.push(entry);
|
|
continue;
|
|
}
|
|
if let Some(entry) = parse_styled_entry(line_number, raw_line) {
|
|
styled_entry_count += 1;
|
|
entries.push(entry);
|
|
continue;
|
|
}
|
|
|
|
malformed_lines.push(LngMalformedLine {
|
|
line_number,
|
|
raw_line: raw_line.to_string(),
|
|
reason: "line is neither a quoted string-id row nor a styled credits row".to_string(),
|
|
});
|
|
}
|
|
|
|
let duplicate_ids = string_id_counts
|
|
.into_iter()
|
|
.filter_map(|(string_id, count)| (count > 1).then_some(string_id))
|
|
.collect::<Vec<_>>();
|
|
let highest_string_id = entries.iter().filter_map(|entry| entry.string_id).max();
|
|
|
|
let format_kinds = entries
|
|
.iter()
|
|
.map(|entry| entry.kind.as_str())
|
|
.collect::<BTreeSet<_>>();
|
|
let format_family = match (format_kinds.contains("string"), format_kinds.contains("styled")) {
|
|
(true, false) => "quoted-string-table".to_string(),
|
|
(false, true) => "styled-credits-lines".to_string(),
|
|
(true, true) => "mixed-language-table".to_string(),
|
|
(false, false) => "unclassified-language-text".to_string(),
|
|
};
|
|
|
|
let mut notes = Vec::new();
|
|
notes.push(
|
|
"Quoted string rows preserve both the raw escape spelling and a normalized text view where `\\n` becomes a line break.".to_string(),
|
|
);
|
|
if format_kinds.contains("styled") {
|
|
notes.push(
|
|
"Styled rows use the observed `*<level>` credits format and preserve the style level separately from the rendered text.".to_string(),
|
|
);
|
|
}
|
|
if !duplicate_ids.is_empty() {
|
|
notes.push("Duplicate string ids are preserved explicitly instead of silently overwriting earlier rows.".to_string());
|
|
}
|
|
|
|
Ok(LngInspectionReport {
|
|
format_family,
|
|
line_count: text.lines().count(),
|
|
entry_count: entries.len(),
|
|
string_entry_count,
|
|
styled_entry_count,
|
|
comment_count,
|
|
blank_line_count,
|
|
duplicate_id_count: duplicate_ids.len(),
|
|
duplicate_ids,
|
|
malformed_line_count: malformed_lines.len(),
|
|
highest_string_id,
|
|
notes,
|
|
entries,
|
|
malformed_lines,
|
|
})
|
|
}
|
|
|
|
fn parse_string_entry(line_number: usize, raw_line: &str) -> Option<LngInspectionEntry> {
|
|
let trimmed = raw_line.trim_start();
|
|
let digit_len = trimmed.chars().take_while(|ch| ch.is_ascii_digit()).count();
|
|
if digit_len == 0 {
|
|
return None;
|
|
}
|
|
let string_id = trimmed[..digit_len].parse().ok()?;
|
|
let remainder = trimmed[digit_len..].trim_start();
|
|
let raw_text = parse_quoted_payload(remainder)?;
|
|
Some(LngInspectionEntry {
|
|
line_number,
|
|
kind: "string".to_string(),
|
|
string_id: Some(string_id),
|
|
style_level: None,
|
|
normalized_text: normalize_lng_text(&raw_text),
|
|
raw_text,
|
|
})
|
|
}
|
|
|
|
fn parse_styled_entry(line_number: usize, raw_line: &str) -> Option<LngInspectionEntry> {
|
|
let trimmed = raw_line.trim_start();
|
|
let remainder = trimmed.strip_prefix('*')?;
|
|
let digit_len = remainder
|
|
.chars()
|
|
.take_while(|ch| ch.is_ascii_digit())
|
|
.count();
|
|
if digit_len == 0 {
|
|
return None;
|
|
}
|
|
let style_level = remainder[..digit_len].parse().ok()?;
|
|
let raw_text = remainder[digit_len..].trim_start().to_string();
|
|
Some(LngInspectionEntry {
|
|
line_number,
|
|
kind: "styled".to_string(),
|
|
string_id: None,
|
|
style_level: Some(style_level),
|
|
normalized_text: normalize_lng_text(&raw_text),
|
|
raw_text,
|
|
})
|
|
}
|
|
|
|
fn parse_quoted_payload(text: &str) -> Option<String> {
|
|
let trimmed = text.trim();
|
|
if !(trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2) {
|
|
return None;
|
|
}
|
|
Some(trimmed[1..trimmed.len() - 1].to_string())
|
|
}
|
|
|
|
fn normalize_lng_text(text: &str) -> String {
|
|
text.replace("\\n", "\n")
|
|
}
|
|
|
|
fn decode_windows_1252(bytes: &[u8]) -> String {
|
|
bytes.iter().map(|byte| decode_windows_1252_byte(*byte)).collect()
|
|
}
|
|
|
|
fn decode_windows_1252_byte(byte: u8) -> char {
|
|
match byte {
|
|
0x80 => '\u{20AC}',
|
|
0x82 => '\u{201A}',
|
|
0x83 => '\u{0192}',
|
|
0x84 => '\u{201E}',
|
|
0x85 => '\u{2026}',
|
|
0x86 => '\u{2020}',
|
|
0x87 => '\u{2021}',
|
|
0x88 => '\u{02C6}',
|
|
0x89 => '\u{2030}',
|
|
0x8A => '\u{0160}',
|
|
0x8B => '\u{2039}',
|
|
0x8C => '\u{0152}',
|
|
0x8E => '\u{017D}',
|
|
0x91 => '\u{2018}',
|
|
0x92 => '\u{2019}',
|
|
0x93 => '\u{201C}',
|
|
0x94 => '\u{201D}',
|
|
0x95 => '\u{2022}',
|
|
0x96 => '\u{2013}',
|
|
0x97 => '\u{2014}',
|
|
0x98 => '\u{02DC}',
|
|
0x99 => '\u{2122}',
|
|
0x9A => '\u{0161}',
|
|
0x9B => '\u{203A}',
|
|
0x9C => '\u{0153}',
|
|
0x9E => '\u{017E}',
|
|
0x9F => '\u{0178}',
|
|
_ => byte as char,
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn parses_standard_string_rows_and_comments() {
|
|
let report = inspect_lng_bytes(b"; comment\n 10 \"Cancel\"\n11\t\"Line\\nBreak\"\n")
|
|
.expect("lng should parse");
|
|
|
|
assert_eq!(report.format_family, "quoted-string-table");
|
|
assert_eq!(report.comment_count, 1);
|
|
assert_eq!(report.string_entry_count, 2);
|
|
assert_eq!(report.highest_string_id, Some(11));
|
|
assert_eq!(report.entries[1].normalized_text, "Line\nBreak");
|
|
}
|
|
|
|
#[test]
|
|
fn parses_styled_credit_rows() {
|
|
let report = inspect_lng_bytes(b"*3Railroad Tycoon 3\n*2Development\nPopTop\n")
|
|
.expect("lng should parse");
|
|
|
|
assert_eq!(report.format_family, "styled-credits-lines");
|
|
assert_eq!(report.styled_entry_count, 2);
|
|
assert_eq!(report.malformed_line_count, 1);
|
|
assert_eq!(report.entries[0].style_level, Some(3));
|
|
assert_eq!(report.entries[0].raw_text, "Railroad Tycoon 3");
|
|
}
|
|
|
|
#[test]
|
|
fn reports_duplicate_string_ids() {
|
|
let report = inspect_lng_bytes(b"1 \"A\"\n1 \"B\"\n").expect("lng should parse");
|
|
|
|
assert_eq!(report.duplicate_id_count, 1);
|
|
assert_eq!(report.duplicate_ids, vec![1]);
|
|
}
|
|
|
|
#[test]
|
|
fn decodes_windows_1252_text() {
|
|
let report = inspect_lng_bytes(b"1 \"Wait\x85\"\n").expect("lng should parse");
|
|
|
|
assert_eq!(report.entries[0].raw_text, "Wait…");
|
|
}
|
|
}
|