Probe map title hints for kind-8 carriers
This commit is contained in:
parent
7463d87e0e
commit
b9054b1780
6 changed files with 655 additions and 18 deletions
|
|
@ -117,7 +117,8 @@ pub use smp::{
|
|||
SmpLoadedWorldEconomicTuningState, SmpLoadedWorldFinanceNeighborhoodState,
|
||||
SmpLoadedWorldIssue37State, SmpLocomotivePolicyFieldObservation,
|
||||
SmpLocomotivePolicyFloatAlignmentCandidate, SmpLocomotivePolicyNeighborhoodProbe,
|
||||
SmpPackedProfileWordLane, SmpPeriodicCompanyServiceTraceReport,
|
||||
SmpMapTitleHintAdjacentPair, SmpMapTitleHintMapReference, SmpMapTitleHintProbe,
|
||||
SmpMapTitleHintTitleHit, SmpPackedProfileWordLane, SmpPeriodicCompanyServiceTraceReport,
|
||||
SmpPostSpecialConditionsScalarLane, SmpPostSpecialConditionsScalarProbe,
|
||||
SmpPostTextFieldNeighborhoodProbe, SmpPostTextFloatAlignmentCandidate,
|
||||
SmpPostTextGroundedFieldObservation, SmpPreRecipeScalarPlateauLane,
|
||||
|
|
@ -139,7 +140,8 @@ pub use smp::{
|
|||
SmpSaveWorldSelectionRoleAnalysis, SmpSaveWorldSelectionRoleAnalysisEntry,
|
||||
SmpSecondaryVariantProbe, SmpServiceTraceBranchStatus, SmpSharedHeader,
|
||||
SmpSpecialConditionEntry, SmpSpecialConditionsProbe,
|
||||
compare_save_region_fixed_row_run_candidates, inspect_save_company_and_chairman_analysis_bytes,
|
||||
compare_save_region_fixed_row_run_candidates, inspect_map_title_hint_bytes,
|
||||
inspect_map_title_hint_file, inspect_save_company_and_chairman_analysis_bytes,
|
||||
inspect_save_company_and_chairman_analysis_file, inspect_save_infrastructure_asset_trace_file,
|
||||
inspect_save_periodic_company_service_trace_file,
|
||||
inspect_save_placed_structure_dynamic_side_buffer_file,
|
||||
|
|
|
|||
|
|
@ -175,6 +175,24 @@ const PACKED_EVENT_REAL_COMPACT_CONTROL_LEN: usize = 37;
|
|||
const PACKED_EVENT_NONDIRECT_CONDITION_ROW_SERIALIZED_LEN: usize = 22;
|
||||
const PACKED_EVENT_NONDIRECT_GROUPED_EFFECT_ROW_SERIALIZED_LEN: usize = 45;
|
||||
const PACKED_EVENT_NONDIRECT_OPTIONAL_NAME_BLOCK_LEN: usize = 0x64;
|
||||
const MAP_TITLE_HINT_ASCII_FRAGMENT_MAX_LEN: usize = 160;
|
||||
const MAP_TITLE_HINT_REFERENCE_PAIR_DISTANCE_LIMIT: usize = 0x100;
|
||||
const POST_LOAD_SCENARIO_FIXUP_TITLE_SET: [&str; 14] = [
|
||||
"Go West!",
|
||||
"Germany",
|
||||
"France",
|
||||
"State of Germany",
|
||||
"New Beginnings",
|
||||
"Dutchlantis",
|
||||
"Britain",
|
||||
"New Zealand",
|
||||
"South East Australia",
|
||||
"Tex-Mex",
|
||||
"Germantown",
|
||||
"The American",
|
||||
"Central Pacific",
|
||||
"Orient Express",
|
||||
];
|
||||
const PACKED_EVENT_TEXT_BAND_LABELS: [&str; 6] = [
|
||||
"primary_text_band",
|
||||
"secondary_text_band_0",
|
||||
|
|
@ -1306,6 +1324,38 @@ pub struct SmpAsciiPreview {
|
|||
pub truncated: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SmpMapTitleHintTitleHit {
|
||||
pub title: String,
|
||||
pub earliest_offset: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SmpMapTitleHintMapReference {
|
||||
pub offset: usize,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SmpMapTitleHintAdjacentPair {
|
||||
pub map_reference_offset: usize,
|
||||
pub map_reference_text: String,
|
||||
pub title_offset: usize,
|
||||
pub title: String,
|
||||
pub byte_distance: usize,
|
||||
pub normalized_stem_match: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SmpMapTitleHintProbe {
|
||||
pub source_kind: String,
|
||||
pub profile_family: Option<String>,
|
||||
pub grounded_title_hits: Vec<SmpMapTitleHintTitleHit>,
|
||||
pub embedded_map_references: Vec<SmpMapTitleHintMapReference>,
|
||||
pub adjacent_reference_title_pairs: Vec<SmpMapTitleHintAdjacentPair>,
|
||||
pub strongest_same_stem_pair: Option<SmpMapTitleHintAdjacentPair>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SmpSharedHeader {
|
||||
pub byte_len: usize,
|
||||
|
|
@ -4249,6 +4299,8 @@ pub struct SmpInspectionReport {
|
|||
pub save_company_roster_probe: Option<SmpLoadedCompanyRoster>,
|
||||
#[serde(default)]
|
||||
pub save_chairman_profile_table_probe: Option<SmpLoadedChairmanProfileTable>,
|
||||
#[serde(default)]
|
||||
pub map_title_hint_probe: Option<SmpMapTitleHintProbe>,
|
||||
pub rt3_105_save_name_table_probe: Option<SmpRt3105SaveNameTableProbe>,
|
||||
pub rt3_105_save_named_locomotive_availability_probe:
|
||||
Option<SmpRt3105SaveNamedLocomotiveAvailabilityProbe>,
|
||||
|
|
@ -4279,6 +4331,41 @@ pub fn inspect_smp_file(path: &Path) -> Result<SmpInspectionReport, Box<dyn std:
|
|||
))
|
||||
}
|
||||
|
||||
pub fn inspect_map_title_hint_file(
|
||||
path: &Path,
|
||||
) -> Result<Option<SmpMapTitleHintProbe>, Box<dyn std::error::Error>> {
|
||||
let bytes = fs::read(path)?;
|
||||
let file_extension_hint = path
|
||||
.extension()
|
||||
.and_then(|extension| extension.to_str())
|
||||
.map(|extension| extension.to_ascii_lowercase());
|
||||
Ok(inspect_map_title_hint_bytes(
|
||||
&bytes,
|
||||
file_extension_hint.as_deref(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn inspect_map_title_hint_bytes(
|
||||
bytes: &[u8],
|
||||
file_extension_hint: Option<&str>,
|
||||
) -> Option<SmpMapTitleHintProbe> {
|
||||
let shared_header = parse_shared_header(bytes);
|
||||
let header_variant_probe = shared_header.as_ref().map(classify_header_variant_probe);
|
||||
let first_ascii_run = find_first_ascii_run(bytes);
|
||||
let early_content_probe = first_ascii_run
|
||||
.as_ref()
|
||||
.and_then(|ascii_run| probe_early_content_layout(bytes, ascii_run));
|
||||
let secondary_variant_probe = early_content_probe
|
||||
.as_ref()
|
||||
.and_then(classify_secondary_variant_probe);
|
||||
let container_profile = classify_container_profile(
|
||||
file_extension_hint,
|
||||
header_variant_probe.as_ref(),
|
||||
secondary_variant_probe.as_ref(),
|
||||
);
|
||||
parse_map_title_hint_probe(bytes, file_extension_hint, container_profile.as_ref())
|
||||
}
|
||||
|
||||
pub fn inspect_unclassified_save_collection_headers_file(
|
||||
path: &Path,
|
||||
) -> Result<Vec<SmpSaveUnclassifiedTaggedCollectionHeaderProbe>, Box<dyn std::error::Error>> {
|
||||
|
|
@ -13447,6 +13534,11 @@ fn inspect_bundle_bytes(bytes: &[u8], file_extension_hint: Option<String>) -> Sm
|
|||
save_world_selection_context_probe.as_ref(),
|
||||
save_company_collection_header_probe.as_ref(),
|
||||
);
|
||||
let map_title_hint_probe = parse_map_title_hint_probe(
|
||||
bytes,
|
||||
file_extension_hint.as_deref(),
|
||||
container_profile.as_ref(),
|
||||
);
|
||||
let rt3_105_save_name_table_probe = parse_rt3_105_save_name_table_probe(
|
||||
bytes,
|
||||
file_extension_hint.as_deref(),
|
||||
|
|
@ -13611,6 +13703,7 @@ fn inspect_bundle_bytes(bytes: &[u8], file_extension_hint: Option<String>) -> Sm
|
|||
save_unclassified_tagged_collection_header_probes,
|
||||
save_company_roster_probe,
|
||||
save_chairman_profile_table_probe,
|
||||
map_title_hint_probe,
|
||||
rt3_105_save_name_table_probe,
|
||||
rt3_105_save_named_locomotive_availability_probe,
|
||||
special_conditions_probe,
|
||||
|
|
@ -21358,6 +21451,173 @@ fn find_first_ascii_run(bytes: &[u8]) -> Option<SmpAsciiPreview> {
|
|||
})
|
||||
}
|
||||
|
||||
fn parse_map_title_hint_probe(
|
||||
bytes: &[u8],
|
||||
file_extension_hint: Option<&str>,
|
||||
container_profile: Option<&SmpContainerProfile>,
|
||||
) -> Option<SmpMapTitleHintProbe> {
|
||||
if file_extension_hint != Some("gmp") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let grounded_title_hits = POST_LOAD_SCENARIO_FIXUP_TITLE_SET
|
||||
.iter()
|
||||
.filter_map(|title| {
|
||||
let offset = find_first_subsequence_offset(bytes, title.as_bytes())?;
|
||||
Some(SmpMapTitleHintTitleHit {
|
||||
title: (*title).to_string(),
|
||||
earliest_offset: offset,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let embedded_map_references = find_ascii_fragment_occurrences_with_suffix(bytes, ".gmp")
|
||||
.into_iter()
|
||||
.map(|(offset, text)| SmpMapTitleHintMapReference { offset, text })
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let adjacent_reference_title_pairs =
|
||||
build_map_title_hint_adjacent_pairs(&embedded_map_references, &grounded_title_hits);
|
||||
let strongest_same_stem_pair = adjacent_reference_title_pairs
|
||||
.iter()
|
||||
.find(|pair| pair.normalized_stem_match)
|
||||
.cloned();
|
||||
|
||||
if grounded_title_hits.is_empty()
|
||||
&& embedded_map_references.is_empty()
|
||||
&& strongest_same_stem_pair.is_none()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(SmpMapTitleHintProbe {
|
||||
source_kind: "grounded-title-string-scan".to_string(),
|
||||
profile_family: container_profile.map(|profile| profile.profile_family.clone()),
|
||||
grounded_title_hits,
|
||||
embedded_map_references,
|
||||
adjacent_reference_title_pairs,
|
||||
strongest_same_stem_pair,
|
||||
})
|
||||
}
|
||||
|
||||
fn build_map_title_hint_adjacent_pairs(
|
||||
map_references: &[SmpMapTitleHintMapReference],
|
||||
title_hits: &[SmpMapTitleHintTitleHit],
|
||||
) -> Vec<SmpMapTitleHintAdjacentPair> {
|
||||
let mut pairs = Vec::new();
|
||||
|
||||
for map_reference in map_references {
|
||||
let mut best_pair: Option<SmpMapTitleHintAdjacentPair> = None;
|
||||
for title_hit in title_hits {
|
||||
let byte_distance = map_reference.offset.abs_diff(title_hit.earliest_offset);
|
||||
if byte_distance > MAP_TITLE_HINT_REFERENCE_PAIR_DISTANCE_LIMIT {
|
||||
continue;
|
||||
}
|
||||
let candidate = SmpMapTitleHintAdjacentPair {
|
||||
map_reference_offset: map_reference.offset,
|
||||
map_reference_text: map_reference.text.clone(),
|
||||
title_offset: title_hit.earliest_offset,
|
||||
title: title_hit.title.clone(),
|
||||
byte_distance,
|
||||
normalized_stem_match: normalize_map_title_hint_stem(&map_reference.text)
|
||||
== normalize_map_title_hint_stem(&title_hit.title),
|
||||
};
|
||||
let replace = match &best_pair {
|
||||
Some(current) => {
|
||||
(candidate.normalized_stem_match && !current.normalized_stem_match)
|
||||
|| (candidate.normalized_stem_match == current.normalized_stem_match
|
||||
&& candidate.byte_distance < current.byte_distance)
|
||||
}
|
||||
None => true,
|
||||
};
|
||||
if replace {
|
||||
best_pair = Some(candidate);
|
||||
}
|
||||
}
|
||||
if let Some(pair) = best_pair {
|
||||
pairs.push(pair);
|
||||
}
|
||||
}
|
||||
|
||||
pairs.sort_by_key(|pair| {
|
||||
(
|
||||
!pair.normalized_stem_match,
|
||||
pair.byte_distance,
|
||||
pair.map_reference_offset,
|
||||
)
|
||||
});
|
||||
pairs
|
||||
}
|
||||
|
||||
fn normalize_map_title_hint_stem(text: &str) -> String {
|
||||
text.trim()
|
||||
.trim_end_matches(".gmp")
|
||||
.trim_end_matches(".GMP")
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn find_first_subsequence_offset(bytes: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
if needle.is_empty() || bytes.len() < needle.len() {
|
||||
return None;
|
||||
}
|
||||
bytes
|
||||
.windows(needle.len())
|
||||
.position(|window| window == needle)
|
||||
}
|
||||
|
||||
fn find_ascii_fragment_occurrences_with_suffix(bytes: &[u8], suffix: &str) -> Vec<(usize, String)> {
|
||||
let suffix_bytes = suffix.as_bytes();
|
||||
if suffix_bytes.is_empty() || bytes.len() < suffix_bytes.len() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut occurrences = Vec::new();
|
||||
let mut seen_offsets = BTreeSet::new();
|
||||
for offset in 0..=bytes.len() - suffix_bytes.len() {
|
||||
if &bytes[offset..offset + suffix_bytes.len()] != suffix_bytes {
|
||||
continue;
|
||||
}
|
||||
if let Some((start, text)) = extract_ascii_fragment_containing(bytes, offset) {
|
||||
if seen_offsets.insert(start) {
|
||||
occurrences.push((start, text));
|
||||
}
|
||||
}
|
||||
}
|
||||
occurrences
|
||||
}
|
||||
|
||||
fn extract_ascii_fragment_containing(bytes: &[u8], offset: usize) -> Option<(usize, String)> {
|
||||
if offset >= bytes.len() || !is_map_title_hint_ascii_fragment_byte(bytes[offset]) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut start = offset;
|
||||
while start > 0 && is_map_title_hint_ascii_fragment_byte(bytes[start - 1]) {
|
||||
start -= 1;
|
||||
}
|
||||
|
||||
let mut end = offset;
|
||||
while end < bytes.len() && is_map_title_hint_ascii_fragment_byte(bytes[end]) {
|
||||
end += 1;
|
||||
}
|
||||
|
||||
if end <= start {
|
||||
return None;
|
||||
}
|
||||
let len = (end - start).min(MAP_TITLE_HINT_ASCII_FRAGMENT_MAX_LEN);
|
||||
let text = String::from_utf8_lossy(&bytes[start..start + len])
|
||||
.trim()
|
||||
.to_string();
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some((start, text))
|
||||
}
|
||||
|
||||
fn is_map_title_hint_ascii_fragment_byte(byte: u8) -> bool {
|
||||
matches!(byte, b' ' | b'!' | b'-' | b'.' | b'/' | b'\\' | b':' | b'_' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
|
||||
}
|
||||
|
||||
fn build_ascii_preview(bytes: &[u8], start: usize, end: usize) -> SmpAsciiPreview {
|
||||
let byte_len = end - start;
|
||||
let preview_bytes = &bytes[start..end];
|
||||
|
|
@ -29834,6 +30094,56 @@ mod tests {
|
|||
assert_eq!(probe.footer_progress_word_1, 0x3714);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_map_title_hint_probe_from_grounded_titles_and_embedded_map_reference() {
|
||||
let mut bytes = vec![0u8; 0x9000];
|
||||
let embedded_reference = b"Dutchlantis.gmp";
|
||||
let title = b"Dutchlantis";
|
||||
let later_title = b"Germany";
|
||||
|
||||
bytes[0x73d0..0x73d0 + embedded_reference.len()].copy_from_slice(embedded_reference);
|
||||
bytes[0x73e0..0x73e0 + title.len()].copy_from_slice(title);
|
||||
bytes[0x8400..0x8400 + later_title.len()].copy_from_slice(later_title);
|
||||
|
||||
let probe = parse_map_title_hint_probe(
|
||||
&bytes,
|
||||
Some("gmp"),
|
||||
Some(&SmpContainerProfile {
|
||||
profile_family: "rt3-105-map-container-v1".to_string(),
|
||||
profile_evidence: vec![],
|
||||
is_known_profile: true,
|
||||
}),
|
||||
)
|
||||
.expect("map title hint probe should parse");
|
||||
|
||||
assert_eq!(probe.source_kind, "grounded-title-string-scan");
|
||||
assert_eq!(
|
||||
probe.profile_family,
|
||||
Some("rt3-105-map-container-v1".to_string())
|
||||
);
|
||||
assert_eq!(probe.grounded_title_hits.len(), 2);
|
||||
assert_eq!(probe.grounded_title_hits[0].title, "Germany");
|
||||
assert_eq!(probe.grounded_title_hits[0].earliest_offset, 0x8400);
|
||||
assert_eq!(probe.grounded_title_hits[1].title, "Dutchlantis");
|
||||
assert_eq!(probe.grounded_title_hits[1].earliest_offset, 0x73d0);
|
||||
assert_eq!(probe.embedded_map_references.len(), 1);
|
||||
assert_eq!(probe.embedded_map_references[0].offset, 0x73d0);
|
||||
assert_eq!(probe.embedded_map_references[0].text, "Dutchlantis.gmp");
|
||||
assert_eq!(probe.adjacent_reference_title_pairs.len(), 1);
|
||||
assert_eq!(
|
||||
probe
|
||||
.strongest_same_stem_pair
|
||||
.as_ref()
|
||||
.map(|pair| pair.title.as_str()),
|
||||
Some("Dutchlantis")
|
||||
);
|
||||
let pair = probe.strongest_same_stem_pair.expect("same-stem pair");
|
||||
assert_eq!(pair.map_reference_offset, 0x73d0);
|
||||
assert_eq!(pair.title_offset, 0x73d0);
|
||||
assert!(pair.normalized_stem_match);
|
||||
assert_eq!(pair.byte_distance, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_rt3_105_save_named_locomotive_availability_probe() {
|
||||
let mut bytes = vec![0u8; 0x9000];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue