From b9054b178034c5cdee8016349bebf5208adea2df Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 19 Apr 2026 10:53:49 -0700 Subject: [PATCH] Probe map title hints for kind-8 carriers --- .../add-building-map-title-hints.json | 129 ++++++++ ...runtime-effect-kind8-title-overlap-note.md | 86 +++++ crates/rrt-cli/src/main.rs | 103 +++++- crates/rrt-runtime/src/lib.rs | 6 +- crates/rrt-runtime/src/smp.rs | 310 ++++++++++++++++++ docs/rehost-queue.md | 39 ++- 6 files changed, 655 insertions(+), 18 deletions(-) create mode 100644 artifacts/exports/rt3-1.05/add-building-map-title-hints.json create mode 100644 artifacts/exports/rt3-1.06/runtime-effect-kind8-title-overlap-note.md diff --git a/artifacts/exports/rt3-1.05/add-building-map-title-hints.json b/artifacts/exports/rt3-1.05/add-building-map-title-hints.json new file mode 100644 index 0000000..e0bd309 --- /dev/null +++ b/artifacts/exports/rt3-1.05/add-building-map-title-hints.json @@ -0,0 +1,129 @@ +{ + "root_path": "/tmp/rrt-add-building-carrier-maps", + "report": { + "maps_scanned": 6, + "maps_with_probe": 5, + "maps_with_grounded_title_hits": 5, + "maps_with_adjacent_title_pairs": 1, + "maps_with_same_stem_adjacent_pairs": 1, + "maps": [ + { + "path": "/tmp/rrt-add-building-carrier-maps/Alternate USA.gmp", + "probe": { + "source_kind": "grounded-title-string-scan", + "profile_family": "rt3-105-map-container-v1", + "grounded_title_hits": [ + { + "title": "Germany", + "earliest_offset": 22150015 + }, + { + "title": "France", + "earliest_offset": 21969932 + }, + { + "title": "Britain", + "earliest_offset": 22150089 + } + ], + "embedded_map_references": [], + "adjacent_reference_title_pairs": [], + "strongest_same_stem_pair": null + } + }, + { + "path": "/tmp/rrt-add-building-carrier-maps/Chicago to New York.gmp", + "probe": { + "source_kind": "grounded-title-string-scan", + "profile_family": "unknown", + "grounded_title_hits": [ + { + "title": "Germany", + "earliest_offset": 11444199 + }, + { + "title": "France", + "earliest_offset": 11444215 + } + ], + "embedded_map_references": [], + "adjacent_reference_title_pairs": [], + "strongest_same_stem_pair": null + } + }, + { + "path": "/tmp/rrt-add-building-carrier-maps/Louisiana.gmp", + "probe": { + "source_kind": "grounded-title-string-scan", + "profile_family": "unknown", + "grounded_title_hits": [ + { + "title": "Germany", + "earliest_offset": 9165940 + }, + { + "title": "Dutchlantis", + "earliest_offset": 29648 + } + ], + "embedded_map_references": [ + { + "offset": 29648, + "text": "Dutchlantis.gmp" + } + ], + "adjacent_reference_title_pairs": [ + { + "map_reference_offset": 29648, + "map_reference_text": "Dutchlantis.gmp", + "title_offset": 29648, + "title": "Dutchlantis", + "byte_distance": 0, + "normalized_stem_match": true + } + ], + "strongest_same_stem_pair": { + "map_reference_offset": 29648, + "map_reference_text": "Dutchlantis.gmp", + "title_offset": 29648, + "title": "Dutchlantis", + "byte_distance": 0, + "normalized_stem_match": true + } + } + }, + { + "path": "/tmp/rrt-add-building-carrier-maps/Pacific Coastal.gmp", + "probe": { + "source_kind": "grounded-title-string-scan", + "profile_family": "unknown", + "grounded_title_hits": [ + { + "title": "Central Pacific", + "earliest_offset": 7854281 + } + ], + "embedded_map_references": [], + "adjacent_reference_title_pairs": [], + "strongest_same_stem_pair": null + } + }, + { + "path": "/tmp/rrt-add-building-carrier-maps/Texas Tea.gmp", + "probe": { + "source_kind": "grounded-title-string-scan", + "profile_family": "unknown", + "grounded_title_hits": [ + { + "title": "Germany", + "earliest_offset": 9985405 + } + ], + "embedded_map_references": [], + "adjacent_reference_title_pairs": [], + "strongest_same_stem_pair": null + } + } + ] + } +} diff --git a/artifacts/exports/rt3-1.06/runtime-effect-kind8-title-overlap-note.md b/artifacts/exports/rt3-1.06/runtime-effect-kind8-title-overlap-note.md new file mode 100644 index 0000000..d149dd3 --- /dev/null +++ b/artifacts/exports/rt3-1.06/runtime-effect-kind8-title-overlap-note.md @@ -0,0 +1,86 @@ +# Runtime Effect Kind-8 Title Overlap Note + +This note tightens the current `0x00442c30` title-fixup hypothesis for the shipped add-building +carrier maps. + +## Grounded title-hint scan + +The checked report +`artifacts/exports/rt3-1.05/add-building-map-title-hints.json` +scans the six bundled add-building carrier maps: + +- `Alternate USA.gmp` +- `Chicago to New York.gmp` +- `Louisiana.gmp` +- `Pacific Coastal.gmp` +- `Rhodes Unfinished.gmp` +- `Texas Tea.gmp` + +against the currently grounded `0x00442c30` title set: + +- `Go West!` +- `Germany` +- `France` +- `State of Germany` +- `New Beginnings` +- `Dutchlantis` +- `Britain` +- `New Zealand` +- `South East Australia` +- `Tex-Mex` +- `Germantown` +- `The American` +- `Central Pacific` +- `Orient Express` + +Observed result: + +- `5 / 6` carrier maps show at least one grounded title hit. +- `1 / 6` carrier maps show an adjacent embedded `.gmp` reference plus grounded title. +- `1 / 6` carrier maps show a same-stem pair. + +The only strong same-stem overlap is: + +- `Louisiana.gmp` + - embedded map reference: `Dutchlantis.gmp` at `0x73d0` + - grounded title hit: `Dutchlantis` at `0x73d0` + - byte distance: `0` + +All other current carrier-map overlaps are weaker late-string hits: + +- `Alternate USA.gmp`: `Germany`, `France`, `Britain` +- `Chicago to New York.gmp`: `Germany`, `France` +- `Pacific Coastal.gmp`: `Central Pacific` +- `Texas Tea.gmp`: `Germany` +- `Rhodes Unfinished.gmp`: no current grounded hit + +## Louisiana versus Dutchlantis runtime-event comparison + +Direct `inspect-smp` comparison shows the strong same-stem title overlap does **not** carry over +to the actual add-building dispatch strip. + +`Louisiana.gmp`: + +- `nondirect_compact_record_count = 14` +- add-building dispatch record index: `5` +- add-building label: `Add Building Warehouse05` +- add-building signature family: + `nondirect-ge1e-h0001-0007-0000-5200-0200-p0000-0000-0000-ffff` +- add-building condition tuple family: `[7:0]` +- add-building cluster: + `nondirect-ge1e-h0001-0007-0000-5200-0200-p0000-0000-0000-ffff :: [7:0]` + +`Dutchlantis.gmp`: + +- `nondirect_compact_record_count = 15` +- no add-building dispatch rows +- current dispatch-strip grouped descriptor labels: + `Company Variable 1`, `Company Variable 2`, `Company Variable 3` + +## Current implication + +The title-fixup branch remains possible for narrow scenario-specific retagging, but the strongest +current filename/title overlap (`Louisiana -> Dutchlantis`) does not reproduce the actual +add-building dispatch cluster. That weakens `0x00442c30` as the primary explanation for the +shipped add-building carrier strip and keeps the stronger bias on the non-title-specific late +bringup owners between ordinary reload `0x00433130` and final kind-`8` service `0x00432f40`. diff --git a/crates/rrt-cli/src/main.rs b/crates/rrt-cli/src/main.rs index a768cfe..bac1f98 100644 --- a/crates/rrt-cli/src/main.rs +++ b/crates/rrt-cli/src/main.rs @@ -24,12 +24,13 @@ use rrt_runtime::{ RuntimeOverlayImportDocument, RuntimeOverlayImportDocumentSource, RuntimeSaveSliceDocument, RuntimeSaveSliceDocumentSource, RuntimeSnapshotDocument, RuntimeSnapshotSource, RuntimeSummary, SAVE_SLICE_DOCUMENT_FORMAT_VERSION, SNAPSHOT_FORMAT_VERSION, SmpClassicPackedProfileBlock, - SmpInspectionReport, SmpLoadedSaveSlice, SmpRt3105PackedProfileBlock, SmpSaveLoadSummary, - WinInspectionReport, compare_save_region_fixed_row_run_candidates, execute_step_command, - extract_pk4_entry_file, inspect_building_types_dir_with_bindings, inspect_campaign_exe_file, - inspect_cargo_economy_sources_with_bindings, inspect_cargo_skin_pk4, inspect_cargo_types_dir, - inspect_pk4_file, inspect_save_company_and_chairman_analysis_file, - inspect_save_infrastructure_asset_trace_file, inspect_save_periodic_company_service_trace_file, + SmpInspectionReport, SmpLoadedSaveSlice, SmpMapTitleHintProbe, SmpRt3105PackedProfileBlock, + SmpSaveLoadSummary, WinInspectionReport, compare_save_region_fixed_row_run_candidates, + execute_step_command, extract_pk4_entry_file, inspect_building_types_dir_with_bindings, + inspect_campaign_exe_file, inspect_cargo_economy_sources_with_bindings, inspect_cargo_skin_pk4, + inspect_cargo_types_dir, inspect_map_title_hint_file, inspect_pk4_file, + inspect_save_company_and_chairman_analysis_file, inspect_save_infrastructure_asset_trace_file, + inspect_save_periodic_company_service_trace_file, inspect_save_placed_structure_dynamic_side_buffer_file, inspect_save_region_queued_notice_records_file, inspect_save_region_service_trace_file, inspect_smp_file, inspect_unclassified_save_collection_headers_file, inspect_win_file, @@ -135,6 +136,9 @@ enum Command { RuntimeInspectCompactEventDispatchClusterCounts { root_path: PathBuf, }, + RuntimeInspectMapTitleHints { + root_path: PathBuf, + }, RuntimeSummarizeSaveLoad { smp_path: PathBuf, }, @@ -319,6 +323,28 @@ struct RuntimeCompactEventDispatchClusterCountsOutput { report: RuntimeCompactEventDispatchClusterCountsReport, } +#[derive(Debug, Serialize)] +struct RuntimeMapTitleHintDirectoryOutput { + root_path: String, + report: RuntimeMapTitleHintDirectoryReport, +} + +#[derive(Debug, Serialize)] +struct RuntimeMapTitleHintDirectoryReport { + maps_scanned: usize, + maps_with_probe: usize, + maps_with_grounded_title_hits: usize, + maps_with_adjacent_title_pairs: usize, + maps_with_same_stem_adjacent_pairs: usize, + maps: Vec, +} + +#[derive(Debug, Serialize)] +struct RuntimeMapTitleHintMapEntry { + path: String, + probe: SmpMapTitleHintProbe, +} + #[derive(Debug, Serialize)] struct RuntimeCompactEventDispatchClusterReport { maps_scanned: usize, @@ -1061,6 +1087,9 @@ fn real_main() -> Result<(), Box> { Command::RuntimeInspectCompactEventDispatchClusterCounts { root_path } => { run_runtime_inspect_compact_event_dispatch_cluster_counts(&root_path)?; } + Command::RuntimeInspectMapTitleHints { root_path } => { + run_runtime_inspect_map_title_hints(&root_path)?; + } Command::RuntimeSummarizeSaveLoad { smp_path } => { run_runtime_summarize_save_load(&smp_path)?; } @@ -1311,6 +1340,13 @@ fn parse_command() -> Result> { root_path: PathBuf::from(root_path), }) } + [command, subcommand, root_path] + if command == "runtime" && subcommand == "inspect-map-title-hints" => + { + Ok(Command::RuntimeInspectMapTitleHints { + root_path: PathBuf::from(root_path), + }) + } [command, subcommand, path] if command == "runtime" && subcommand == "summarize-save-load" => { @@ -1611,7 +1647,7 @@ fn parse_command() -> Result> { }) } _ => Err( - "usage: rrt-cli [validate [repo-root] | finance eval | finance diff | runtime validate-fixture | runtime summarize-fixture | runtime export-fixture-state | runtime diff-state | runtime summarize-state | runtime import-state | runtime inspect-smp | runtime inspect-candidate-table | runtime inspect-compact-event-dispatch-cluster | runtime inspect-compact-event-dispatch-cluster-counts | runtime summarize-save-load | runtime load-save-slice | runtime inspect-save-company-chairman | runtime inspect-save-placed-structure-triplets | runtime compare-region-fixed-row-runs | runtime inspect-periodic-company-service-trace | runtime inspect-region-service-trace | runtime inspect-infrastructure-asset-trace | runtime inspect-save-region-queued-notice-records | runtime inspect-placed-structure-dynamic-side-buffer | runtime inspect-unclassified-save-collections | runtime import-save-state | runtime export-save-slice | runtime export-overlay-import | runtime inspect-pk4 | runtime inspect-cargo-types | runtime inspect-building-type-sources [building-bindings.json] | runtime inspect-cargo-skins | runtime inspect-cargo-economy-sources | runtime inspect-cargo-production-selector | runtime inspect-cargo-price-selector | runtime inspect-win | runtime extract-pk4-entry | runtime inspect-campaign-exe | runtime compare-classic-profile [saveN.gms...] | runtime compare-105-profile [saveN.gms...] | runtime compare-candidate-table [fileN...] | runtime compare-recipe-book-lines [fileN...] | runtime compare-setup-payload-core [fileN...] | runtime compare-setup-launch-payload [fileN...] | runtime compare-post-special-conditions-scalars [fileN...] | runtime scan-candidate-table-headers | runtime scan-special-conditions | runtime scan-aligned-runtime-rule-band | runtime scan-post-special-conditions-scalars | runtime scan-post-special-conditions-tail | runtime scan-recipe-book-lines | runtime export-profile-block ]" + "usage: rrt-cli [validate [repo-root] | finance eval | finance diff | runtime validate-fixture | runtime summarize-fixture | runtime export-fixture-state | runtime diff-state | runtime summarize-state | runtime import-state | runtime inspect-smp | runtime inspect-candidate-table | runtime inspect-compact-event-dispatch-cluster | runtime inspect-compact-event-dispatch-cluster-counts | runtime inspect-map-title-hints | runtime summarize-save-load | runtime load-save-slice | runtime inspect-save-company-chairman | runtime inspect-save-placed-structure-triplets | runtime compare-region-fixed-row-runs | runtime inspect-periodic-company-service-trace | runtime inspect-region-service-trace | runtime inspect-infrastructure-asset-trace | runtime inspect-save-region-queued-notice-records | runtime inspect-placed-structure-dynamic-side-buffer | runtime inspect-unclassified-save-collections | runtime import-save-state | runtime export-save-slice | runtime export-overlay-import | runtime inspect-pk4 | runtime inspect-cargo-types | runtime inspect-building-type-sources [building-bindings.json] | runtime inspect-cargo-skins | runtime inspect-cargo-economy-sources | runtime inspect-cargo-production-selector | runtime inspect-cargo-price-selector | runtime inspect-win | runtime extract-pk4-entry | runtime inspect-campaign-exe | runtime compare-classic-profile [saveN.gms...] | runtime compare-105-profile [saveN.gms...] | runtime compare-candidate-table [fileN...] | runtime compare-recipe-book-lines [fileN...] | runtime compare-setup-payload-core [fileN...] | runtime compare-setup-launch-payload [fileN...] | runtime compare-post-special-conditions-scalars [fileN...] | runtime scan-candidate-table-headers | runtime scan-special-conditions | runtime scan-aligned-runtime-rule-band | runtime scan-post-special-conditions-scalars | runtime scan-post-special-conditions-tail | runtime scan-recipe-book-lines | runtime export-profile-block ]" .into(), ), } @@ -1816,6 +1852,59 @@ fn run_runtime_inspect_smp(smp_path: &Path) -> Result<(), Box Result<(), Box> { + let mut maps = Vec::new(); + let mut maps_scanned = 0usize; + let mut maps_with_probe = 0usize; + let mut maps_with_grounded_title_hits = 0usize; + let mut maps_with_adjacent_title_pairs = 0usize; + let mut maps_with_same_stem_adjacent_pairs = 0usize; + + let mut paths = fs::read_dir(root_path)? + .filter_map(|entry| entry.ok().map(|entry| entry.path())) + .filter(|path| { + path.extension() + .and_then(|extension| extension.to_str()) + .is_some_and(|extension| extension.eq_ignore_ascii_case("gmp")) + }) + .collect::>(); + paths.sort(); + + for path in paths { + maps_scanned += 1; + if let Some(probe) = inspect_map_title_hint_file(&path)? { + maps_with_probe += 1; + if !probe.grounded_title_hits.is_empty() { + maps_with_grounded_title_hits += 1; + } + if !probe.adjacent_reference_title_pairs.is_empty() { + maps_with_adjacent_title_pairs += 1; + } + if probe.strongest_same_stem_pair.is_some() { + maps_with_same_stem_adjacent_pairs += 1; + } + maps.push(RuntimeMapTitleHintMapEntry { + path: path.display().to_string(), + probe, + }); + } + } + + let output = RuntimeMapTitleHintDirectoryOutput { + root_path: root_path.display().to_string(), + report: RuntimeMapTitleHintDirectoryReport { + maps_scanned, + maps_with_probe, + maps_with_grounded_title_hits, + maps_with_adjacent_title_pairs, + maps_with_same_stem_adjacent_pairs, + maps, + }, + }; + println!("{}", serde_json::to_string_pretty(&output)?); + Ok(()) +} + fn run_runtime_inspect_compact_event_dispatch_cluster( root_path: &Path, ) -> Result<(), Box> { diff --git a/crates/rrt-runtime/src/lib.rs b/crates/rrt-runtime/src/lib.rs index 91ad112..a7081a5 100644 --- a/crates/rrt-runtime/src/lib.rs +++ b/crates/rrt-runtime/src/lib.rs @@ -117,7 +117,8 @@ pub use smp::{ SmpLoadedWorldEconomicTuningState, SmpLoadedWorldFinanceNeighborhoodState, SmpLoadedWorldIssue37State, SmpLocomotivePolicyFieldObservation, SmpLocomotivePolicyFloatAlignmentCandidate, SmpLocomotivePolicyNeighborhoodProbe, - SmpPackedProfileWordLane, SmpPeriodicCompanyServiceTraceReport, + SmpMapTitleHintAdjacentPair, SmpMapTitleHintMapReference, SmpMapTitleHintProbe, + SmpMapTitleHintTitleHit, SmpPackedProfileWordLane, SmpPeriodicCompanyServiceTraceReport, SmpPostSpecialConditionsScalarLane, SmpPostSpecialConditionsScalarProbe, SmpPostTextFieldNeighborhoodProbe, SmpPostTextFloatAlignmentCandidate, SmpPostTextGroundedFieldObservation, SmpPreRecipeScalarPlateauLane, @@ -139,7 +140,8 @@ pub use smp::{ SmpSaveWorldSelectionRoleAnalysis, SmpSaveWorldSelectionRoleAnalysisEntry, SmpSecondaryVariantProbe, SmpServiceTraceBranchStatus, SmpSharedHeader, SmpSpecialConditionEntry, SmpSpecialConditionsProbe, - compare_save_region_fixed_row_run_candidates, inspect_save_company_and_chairman_analysis_bytes, + compare_save_region_fixed_row_run_candidates, inspect_map_title_hint_bytes, + inspect_map_title_hint_file, inspect_save_company_and_chairman_analysis_bytes, inspect_save_company_and_chairman_analysis_file, inspect_save_infrastructure_asset_trace_file, inspect_save_periodic_company_service_trace_file, inspect_save_placed_structure_dynamic_side_buffer_file, diff --git a/crates/rrt-runtime/src/smp.rs b/crates/rrt-runtime/src/smp.rs index 5e71835..8740b20 100644 --- a/crates/rrt-runtime/src/smp.rs +++ b/crates/rrt-runtime/src/smp.rs @@ -175,6 +175,24 @@ const PACKED_EVENT_REAL_COMPACT_CONTROL_LEN: usize = 37; const PACKED_EVENT_NONDIRECT_CONDITION_ROW_SERIALIZED_LEN: usize = 22; const PACKED_EVENT_NONDIRECT_GROUPED_EFFECT_ROW_SERIALIZED_LEN: usize = 45; const PACKED_EVENT_NONDIRECT_OPTIONAL_NAME_BLOCK_LEN: usize = 0x64; +const MAP_TITLE_HINT_ASCII_FRAGMENT_MAX_LEN: usize = 160; +const MAP_TITLE_HINT_REFERENCE_PAIR_DISTANCE_LIMIT: usize = 0x100; +const POST_LOAD_SCENARIO_FIXUP_TITLE_SET: [&str; 14] = [ + "Go West!", + "Germany", + "France", + "State of Germany", + "New Beginnings", + "Dutchlantis", + "Britain", + "New Zealand", + "South East Australia", + "Tex-Mex", + "Germantown", + "The American", + "Central Pacific", + "Orient Express", +]; const PACKED_EVENT_TEXT_BAND_LABELS: [&str; 6] = [ "primary_text_band", "secondary_text_band_0", @@ -1306,6 +1324,38 @@ pub struct SmpAsciiPreview { pub truncated: bool, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SmpMapTitleHintTitleHit { + pub title: String, + pub earliest_offset: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SmpMapTitleHintMapReference { + pub offset: usize, + pub text: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SmpMapTitleHintAdjacentPair { + pub map_reference_offset: usize, + pub map_reference_text: String, + pub title_offset: usize, + pub title: String, + pub byte_distance: usize, + pub normalized_stem_match: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SmpMapTitleHintProbe { + pub source_kind: String, + pub profile_family: Option, + pub grounded_title_hits: Vec, + pub embedded_map_references: Vec, + pub adjacent_reference_title_pairs: Vec, + pub strongest_same_stem_pair: Option, +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct SmpSharedHeader { pub byte_len: usize, @@ -4249,6 +4299,8 @@ pub struct SmpInspectionReport { pub save_company_roster_probe: Option, #[serde(default)] pub save_chairman_profile_table_probe: Option, + #[serde(default)] + pub map_title_hint_probe: Option, pub rt3_105_save_name_table_probe: Option, pub rt3_105_save_named_locomotive_availability_probe: Option, @@ -4279,6 +4331,41 @@ pub fn inspect_smp_file(path: &Path) -> Result Result, Box> { + let bytes = fs::read(path)?; + let file_extension_hint = path + .extension() + .and_then(|extension| extension.to_str()) + .map(|extension| extension.to_ascii_lowercase()); + Ok(inspect_map_title_hint_bytes( + &bytes, + file_extension_hint.as_deref(), + )) +} + +pub fn inspect_map_title_hint_bytes( + bytes: &[u8], + file_extension_hint: Option<&str>, +) -> Option { + let shared_header = parse_shared_header(bytes); + let header_variant_probe = shared_header.as_ref().map(classify_header_variant_probe); + let first_ascii_run = find_first_ascii_run(bytes); + let early_content_probe = first_ascii_run + .as_ref() + .and_then(|ascii_run| probe_early_content_layout(bytes, ascii_run)); + let secondary_variant_probe = early_content_probe + .as_ref() + .and_then(classify_secondary_variant_probe); + let container_profile = classify_container_profile( + file_extension_hint, + header_variant_probe.as_ref(), + secondary_variant_probe.as_ref(), + ); + parse_map_title_hint_probe(bytes, file_extension_hint, container_profile.as_ref()) +} + pub fn inspect_unclassified_save_collection_headers_file( path: &Path, ) -> Result, Box> { @@ -13447,6 +13534,11 @@ fn inspect_bundle_bytes(bytes: &[u8], file_extension_hint: Option) -> Sm save_world_selection_context_probe.as_ref(), save_company_collection_header_probe.as_ref(), ); + let map_title_hint_probe = parse_map_title_hint_probe( + bytes, + file_extension_hint.as_deref(), + container_profile.as_ref(), + ); let rt3_105_save_name_table_probe = parse_rt3_105_save_name_table_probe( bytes, file_extension_hint.as_deref(), @@ -13611,6 +13703,7 @@ fn inspect_bundle_bytes(bytes: &[u8], file_extension_hint: Option) -> Sm save_unclassified_tagged_collection_header_probes, save_company_roster_probe, save_chairman_profile_table_probe, + map_title_hint_probe, rt3_105_save_name_table_probe, rt3_105_save_named_locomotive_availability_probe, special_conditions_probe, @@ -21358,6 +21451,173 @@ fn find_first_ascii_run(bytes: &[u8]) -> Option { }) } +fn parse_map_title_hint_probe( + bytes: &[u8], + file_extension_hint: Option<&str>, + container_profile: Option<&SmpContainerProfile>, +) -> Option { + if file_extension_hint != Some("gmp") { + return None; + } + + let grounded_title_hits = POST_LOAD_SCENARIO_FIXUP_TITLE_SET + .iter() + .filter_map(|title| { + let offset = find_first_subsequence_offset(bytes, title.as_bytes())?; + Some(SmpMapTitleHintTitleHit { + title: (*title).to_string(), + earliest_offset: offset, + }) + }) + .collect::>(); + + let embedded_map_references = find_ascii_fragment_occurrences_with_suffix(bytes, ".gmp") + .into_iter() + .map(|(offset, text)| SmpMapTitleHintMapReference { offset, text }) + .collect::>(); + + let adjacent_reference_title_pairs = + build_map_title_hint_adjacent_pairs(&embedded_map_references, &grounded_title_hits); + let strongest_same_stem_pair = adjacent_reference_title_pairs + .iter() + .find(|pair| pair.normalized_stem_match) + .cloned(); + + if grounded_title_hits.is_empty() + && embedded_map_references.is_empty() + && strongest_same_stem_pair.is_none() + { + return None; + } + + Some(SmpMapTitleHintProbe { + source_kind: "grounded-title-string-scan".to_string(), + profile_family: container_profile.map(|profile| profile.profile_family.clone()), + grounded_title_hits, + embedded_map_references, + adjacent_reference_title_pairs, + strongest_same_stem_pair, + }) +} + +fn build_map_title_hint_adjacent_pairs( + map_references: &[SmpMapTitleHintMapReference], + title_hits: &[SmpMapTitleHintTitleHit], +) -> Vec { + let mut pairs = Vec::new(); + + for map_reference in map_references { + let mut best_pair: Option = None; + for title_hit in title_hits { + let byte_distance = map_reference.offset.abs_diff(title_hit.earliest_offset); + if byte_distance > MAP_TITLE_HINT_REFERENCE_PAIR_DISTANCE_LIMIT { + continue; + } + let candidate = SmpMapTitleHintAdjacentPair { + map_reference_offset: map_reference.offset, + map_reference_text: map_reference.text.clone(), + title_offset: title_hit.earliest_offset, + title: title_hit.title.clone(), + byte_distance, + normalized_stem_match: normalize_map_title_hint_stem(&map_reference.text) + == normalize_map_title_hint_stem(&title_hit.title), + }; + let replace = match &best_pair { + Some(current) => { + (candidate.normalized_stem_match && !current.normalized_stem_match) + || (candidate.normalized_stem_match == current.normalized_stem_match + && candidate.byte_distance < current.byte_distance) + } + None => true, + }; + if replace { + best_pair = Some(candidate); + } + } + if let Some(pair) = best_pair { + pairs.push(pair); + } + } + + pairs.sort_by_key(|pair| { + ( + !pair.normalized_stem_match, + pair.byte_distance, + pair.map_reference_offset, + ) + }); + pairs +} + +fn normalize_map_title_hint_stem(text: &str) -> String { + text.trim() + .trim_end_matches(".gmp") + .trim_end_matches(".GMP") + .to_ascii_lowercase() +} + +fn find_first_subsequence_offset(bytes: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() || bytes.len() < needle.len() { + return None; + } + bytes + .windows(needle.len()) + .position(|window| window == needle) +} + +fn find_ascii_fragment_occurrences_with_suffix(bytes: &[u8], suffix: &str) -> Vec<(usize, String)> { + let suffix_bytes = suffix.as_bytes(); + if suffix_bytes.is_empty() || bytes.len() < suffix_bytes.len() { + return Vec::new(); + } + + let mut occurrences = Vec::new(); + let mut seen_offsets = BTreeSet::new(); + for offset in 0..=bytes.len() - suffix_bytes.len() { + if &bytes[offset..offset + suffix_bytes.len()] != suffix_bytes { + continue; + } + if let Some((start, text)) = extract_ascii_fragment_containing(bytes, offset) { + if seen_offsets.insert(start) { + occurrences.push((start, text)); + } + } + } + occurrences +} + +fn extract_ascii_fragment_containing(bytes: &[u8], offset: usize) -> Option<(usize, String)> { + if offset >= bytes.len() || !is_map_title_hint_ascii_fragment_byte(bytes[offset]) { + return None; + } + + let mut start = offset; + while start > 0 && is_map_title_hint_ascii_fragment_byte(bytes[start - 1]) { + start -= 1; + } + + let mut end = offset; + while end < bytes.len() && is_map_title_hint_ascii_fragment_byte(bytes[end]) { + end += 1; + } + + if end <= start { + return None; + } + let len = (end - start).min(MAP_TITLE_HINT_ASCII_FRAGMENT_MAX_LEN); + let text = String::from_utf8_lossy(&bytes[start..start + len]) + .trim() + .to_string(); + if text.is_empty() { + return None; + } + Some((start, text)) +} + +fn is_map_title_hint_ascii_fragment_byte(byte: u8) -> bool { + matches!(byte, b' ' | b'!' | b'-' | b'.' | b'/' | b'\\' | b':' | b'_' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') +} + fn build_ascii_preview(bytes: &[u8], start: usize, end: usize) -> SmpAsciiPreview { let byte_len = end - start; let preview_bytes = &bytes[start..end]; @@ -29834,6 +30094,56 @@ mod tests { assert_eq!(probe.footer_progress_word_1, 0x3714); } + #[test] + fn parses_map_title_hint_probe_from_grounded_titles_and_embedded_map_reference() { + let mut bytes = vec![0u8; 0x9000]; + let embedded_reference = b"Dutchlantis.gmp"; + let title = b"Dutchlantis"; + let later_title = b"Germany"; + + bytes[0x73d0..0x73d0 + embedded_reference.len()].copy_from_slice(embedded_reference); + bytes[0x73e0..0x73e0 + title.len()].copy_from_slice(title); + bytes[0x8400..0x8400 + later_title.len()].copy_from_slice(later_title); + + let probe = parse_map_title_hint_probe( + &bytes, + Some("gmp"), + Some(&SmpContainerProfile { + profile_family: "rt3-105-map-container-v1".to_string(), + profile_evidence: vec![], + is_known_profile: true, + }), + ) + .expect("map title hint probe should parse"); + + assert_eq!(probe.source_kind, "grounded-title-string-scan"); + assert_eq!( + probe.profile_family, + Some("rt3-105-map-container-v1".to_string()) + ); + assert_eq!(probe.grounded_title_hits.len(), 2); + assert_eq!(probe.grounded_title_hits[0].title, "Germany"); + assert_eq!(probe.grounded_title_hits[0].earliest_offset, 0x8400); + assert_eq!(probe.grounded_title_hits[1].title, "Dutchlantis"); + assert_eq!(probe.grounded_title_hits[1].earliest_offset, 0x73d0); + assert_eq!(probe.embedded_map_references.len(), 1); + assert_eq!(probe.embedded_map_references[0].offset, 0x73d0); + assert_eq!(probe.embedded_map_references[0].text, "Dutchlantis.gmp"); + assert_eq!(probe.adjacent_reference_title_pairs.len(), 1); + assert_eq!( + probe + .strongest_same_stem_pair + .as_ref() + .map(|pair| pair.title.as_str()), + Some("Dutchlantis") + ); + let pair = probe.strongest_same_stem_pair.expect("same-stem pair"); + assert_eq!(pair.map_reference_offset, 0x73d0); + assert_eq!(pair.title_offset, 0x73d0); + assert!(pair.normalized_stem_match); + assert_eq!(pair.byte_distance, 0); + } + #[test] fn parses_rt3_105_save_named_locomotive_availability_probe() { let mut bytes = vec![0u8; 0x9000]; diff --git a/docs/rehost-queue.md b/docs/rehost-queue.md index 015061c..682d38e 100644 --- a/docs/rehost-queue.md +++ b/docs/rehost-queue.md @@ -435,16 +435,37 @@ Working rule: late-bringup facts for `0x00446d40`, `0x00443a50`, `0x00442c30`, and the explicit `SP - GOLD` / `Labor` trigger-kind rewrites, so the next pass can work from one bounded note instead of stitching the same ordering back together from the queue plus function-map prose. - - the shipped add-building carrier corpus weakens the current retagger hypothesis too: - the six bundled-map titles in + - the shipped add-building carrier corpus no longer supports the older filename-mismatch bias: + the checked report + `artifacts/exports/rt3-1.05/add-building-map-title-hints.json` + now scans the six bundled carrier maps in `artifacts/exports/rt3-1.05/add-building-compact-dispatch-corpus.json` - (`Alternate USA`, `Chicago to New York`, `Louisiana`, `Pacific Coastal`, - `Rhodes Unfinished`, `Texas Tea`) do not currently overlap the grounded - `0x00442c30` scenario-title set (`Go West!`, `Germany`, `France`, `State of Germany`, - `New Beginnings`, `Dutchlantis`, `Britain`, `New Zealand`, `South East Australia`, - `Tex-Mex`, `Germantown`, `The American`, `Central Pacific`, `Orient Express`). That biases the - remaining add-building source search away from the already-grounded title-fixup branch and - toward some other late bringup owner between `0x00433130` reload and final kind-`8` service. + against the grounded `0x00442c30` title set (`Go West!`, `Germany`, `France`, + `State of Germany`, `New Beginnings`, `Dutchlantis`, `Britain`, `New Zealand`, + `South East Australia`, `Tex-Mex`, `Germantown`, `The American`, `Central Pacific`, + `Orient Express`). + - the new title-hint probe narrows that evidence precisely: + five of the six shipped carrier maps now show at least one grounded retagger-title hit, + but only one map currently shows an adjacent embedded `.gmp` reference plus grounded title and + only one shows a same-stem pair. `Louisiana.gmp` carries + `Dutchlantis.gmp` / `Dutchlantis` at offset `0x73d0` with zero byte distance, while the other + current carrier-map hits stay weaker (`Alternate USA.gmp` late `Germany` / `France` / + `Britain`, `Chicago to New York.gmp` late `Germany` / `France`, + `Pacific Coastal.gmp` later `Central Pacific`, `Texas Tea.gmp` later `Germany`, + `Rhodes Unfinished.gmp` no current hit). + - that keeps the title-fixup branch alive but no longer as a broad filename-level explanation: + the evidence now supports a narrow “one strong `Louisiana -> Dutchlantis` overlap plus several + weaker prose-only or late-string overlaps” reading rather than a clean one-to-one mapping from + the shipped add-building carrier filenames to the grounded `0x00442c30` scenario-title set. + - the direct runtime-event comparison narrows it further too: + the checked note + `artifacts/exports/rt3-1.06/runtime-effect-kind8-title-overlap-note.md` + shows that `Louisiana.gmp` is the only carrier with a same-stem + `Dutchlantis.gmp` / `Dutchlantis` pair, but `Dutchlantis.gmp` itself still has no current + add-building dispatch rows while `Louisiana.gmp` keeps the one-row + `Add Building Warehouse05` cluster on + `nondirect-ge1e-h0001-0007-0000-5200-0200-p0000-0000-0000-ffff :: [7:0]`. So the strongest + current title overlap still does not reproduce the actual shipped add-building row family. - the post-reload candidate set is checked in now too: `artifacts/exports/rt3-1.06/runtime-effect-kind8-post-reload-candidates.md` extracts the currently plausible late `0x00443a50` branches between ordinary reload and final kind-`8`