use std::collections::{BTreeMap, BTreeSet}; use std::fs; use std::path::Path; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum BuildingTypeSourceKind { Bca, Bty, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeSourceFile { pub file_name: String, pub raw_stem: String, pub canonical_stem: String, pub source_kind: BuildingTypeSourceKind, #[serde(default)] pub byte_len: Option, #[serde(default)] pub bca_selector_probe: Option, #[serde(default)] pub bty_header_probe: Option, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeSourceEntry { pub canonical_stem: String, pub raw_stems: Vec, pub source_kinds: Vec, pub file_names: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeBcaSelectorProbe { pub byte_0xb8: u8, pub byte_0xb8_hex: String, pub byte_0xb9: u8, pub byte_0xb9_hex: String, pub byte_0xba: u8, pub byte_0xba_hex: String, pub byte_0xbb: u8, pub byte_0xbb_hex: String, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeBtyHeaderProbe { pub type_id: u32, pub type_id_hex: String, pub name_0x04: String, pub name_0x22: String, pub name_0x40: String, pub name_0x5e: String, pub name_0x7c: String, pub name_0x9a: String, pub byte_0xb8: u8, pub byte_0xb8_hex: String, pub byte_0xb9: u8, pub byte_0xb9_hex: String, pub byte_0xba: u8, pub byte_0xba_hex: String, pub dword_0xbb: u32, pub dword_0xbb_hex: String, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeBcaSelectorPatternSummary { pub byte_len: usize, pub byte_0xb8_hex: String, pub byte_0xb9_hex: String, pub byte_0xba_hex: String, pub byte_0xbb_hex: String, pub file_count: usize, pub sample_file_names: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeNamedBindingComparison { pub bindings_path: String, pub named_binding_count: usize, pub shared_canonical_stem_count: usize, pub binding_only_canonical_stems: Vec, pub source_only_canonical_stems: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeRecoveredTableSummary { pub recovered_style_themes: Vec, pub recovered_source_kinds: Vec, pub present_style_station_entries: Vec, pub present_standalone_entries: Vec, pub bare_port_warehouse_files: Vec, pub nonzero_bty_header_dword_summaries: Vec, pub nonzero_bty_header_name_0x40_summaries: Vec, pub nonzero_bty_header_name_0x5e_summaries: Vec, pub nonzero_bty_header_name_0x7c_summaries: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeBtyHeaderDwordSummary { pub dword_0xbb: u32, pub dword_0xbb_hex: String, pub file_count: usize, pub sample_file_names: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeBtyHeaderNameSummary { pub header_offset_hex: String, pub header_value: String, pub file_count: usize, pub sample_file_names: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BuildingTypeSourceReport { pub directory_path: String, pub bca_file_count: usize, pub bty_file_count: usize, pub unique_canonical_stem_count: usize, pub bca_selector_pattern_count: usize, #[serde(default)] pub named_binding_comparison: Option, pub recovered_table_summary: BuildingTypeRecoveredTableSummary, pub notes: Vec, pub bca_selector_patterns: Vec, pub files: Vec, pub entries: Vec, } pub fn inspect_building_types_dir( path: &Path, ) -> Result> { inspect_building_types_dir_with_bindings(path, None) } pub fn inspect_building_types_dir_with_bindings( path: &Path, bindings_path: Option<&Path>, ) -> Result> { let mut files = Vec::new(); for entry in fs::read_dir(path)? { let entry = entry?; if !entry.file_type()?.is_file() { continue; } let file_name = entry.file_name().to_string_lossy().into_owned(); let Some(extension) = Path::new(&file_name) .extension() .and_then(|extension| extension.to_str()) .map(|extension| extension.to_ascii_lowercase()) else { continue; }; let source_kind = match extension.as_str() { "bca" => BuildingTypeSourceKind::Bca, "bty" => BuildingTypeSourceKind::Bty, _ => continue, }; let bytes = fs::read(entry.path())?; let raw_stem = Path::new(&file_name) .file_stem() .and_then(|stem| stem.to_str()) .unwrap_or("") .to_string(); if raw_stem.is_empty() { continue; } files.push(BuildingTypeSourceFile { file_name, canonical_stem: canonicalize_building_stem(&raw_stem), raw_stem, source_kind: source_kind.clone(), byte_len: Some(bytes.len()), bca_selector_probe: match source_kind { BuildingTypeSourceKind::Bca => Some(probe_bca_selector_bytes(&bytes)), BuildingTypeSourceKind::Bty => None, }, bty_header_probe: match source_kind { BuildingTypeSourceKind::Bca => None, BuildingTypeSourceKind::Bty => Some(probe_bty_header(&bytes)), }, }); } files.sort_by(|left, right| { left.canonical_stem .cmp(&right.canonical_stem) .then_with(|| left.file_name.cmp(&right.file_name)) }); let mut grouped = BTreeMap::>::new(); for file in &files { grouped .entry(file.canonical_stem.clone()) .or_default() .push(file); } let entries = grouped .into_iter() .map(|(canonical_stem, group)| BuildingTypeSourceEntry { canonical_stem, raw_stems: group .iter() .map(|file| file.raw_stem.clone()) .collect::>() .into_iter() .collect(), source_kinds: group .iter() .map(|file| file.source_kind.clone()) .collect::>() .into_iter() .collect(), file_names: group .iter() .map(|file| file.file_name.clone()) .collect::>() .into_iter() .collect(), }) .collect::>(); let bca_file_count = files .iter() .filter(|file| matches!(file.source_kind, BuildingTypeSourceKind::Bca)) .count(); let bty_file_count = files .iter() .filter(|file| matches!(file.source_kind, BuildingTypeSourceKind::Bty)) .count(); let mut grouped_selector_patterns = BTreeMap::<(usize, String, String, String, String), Vec>::new(); for file in &files { let Some(probe) = &file.bca_selector_probe else { continue; }; grouped_selector_patterns .entry(( file.byte_len.unwrap_or_default(), probe.byte_0xb8_hex.clone(), probe.byte_0xb9_hex.clone(), probe.byte_0xba_hex.clone(), probe.byte_0xbb_hex.clone(), )) .or_default() .push(file.file_name.clone()); } let bca_selector_patterns = grouped_selector_patterns .into_iter() .map( |( (byte_len, byte_0xb8_hex, byte_0xb9_hex, byte_0xba_hex, byte_0xbb_hex), file_names, )| BuildingTypeBcaSelectorPatternSummary { byte_len, byte_0xb8_hex, byte_0xb9_hex, byte_0xba_hex, byte_0xbb_hex, file_count: file_names.len(), sample_file_names: file_names.into_iter().take(12).collect(), }, ) .collect::>(); let notes = vec![ "BuildingTypes sources are grouped by a canonical stem that lowercases and strips spaces, underscores, and hyphens so paired .bca/.bty variants collapse onto one asset token.".to_string(), "This report is an offline asset-pool view only; it does not by itself assign live candidate ids or prove scenario candidate-table availability.".to_string(), "For .bca files, the report also exposes the narrow selector-byte window at offsets 0xb8..0xbb used by the grounded aux-candidate and live-candidate stream decoders.".to_string(), "The recovered stock table above the Tier-2 building seam combines one style/theme subset with one source-kind table; this report now surfaces the matching on-disk filename families directly.".to_string(), ]; let named_binding_comparison = if let Some(bindings_path) = bindings_path { Some(load_named_binding_comparison(bindings_path, &entries)?) } else { None }; let recovered_table_summary = summarize_recovered_table_families(&entries, &files); Ok(BuildingTypeSourceReport { directory_path: path.display().to_string(), bca_file_count, bty_file_count, unique_canonical_stem_count: entries.len(), bca_selector_pattern_count: bca_selector_patterns.len(), named_binding_comparison, recovered_table_summary, notes, bca_selector_patterns, files, entries, }) } fn probe_bca_selector_bytes(bytes: &[u8]) -> BuildingTypeBcaSelectorProbe { let byte_0xb8 = bytes.get(0xb8).copied().unwrap_or(0); let byte_0xb9 = bytes.get(0xb9).copied().unwrap_or(0); let byte_0xba = bytes.get(0xba).copied().unwrap_or(0); let byte_0xbb = bytes.get(0xbb).copied().unwrap_or(0); BuildingTypeBcaSelectorProbe { byte_0xb8, byte_0xb8_hex: format!("0x{byte_0xb8:02x}"), byte_0xb9, byte_0xb9_hex: format!("0x{byte_0xb9:02x}"), byte_0xba, byte_0xba_hex: format!("0x{byte_0xba:02x}"), byte_0xbb, byte_0xbb_hex: format!("0x{byte_0xbb:02x}"), } } fn probe_bty_header(bytes: &[u8]) -> BuildingTypeBtyHeaderProbe { let type_id = read_u32_le(bytes, 0x00); let byte_0xb8 = bytes.get(0xb8).copied().unwrap_or(0); let byte_0xb9 = bytes.get(0xb9).copied().unwrap_or(0); let byte_0xba = bytes.get(0xba).copied().unwrap_or(0); let dword_0xbb = read_u32_le(bytes, 0xbb); BuildingTypeBtyHeaderProbe { type_id, type_id_hex: format!("0x{type_id:08x}"), name_0x04: read_c_string(bytes, 0x04, 0x1e), name_0x22: read_c_string(bytes, 0x22, 0x1e), name_0x40: read_c_string(bytes, 0x40, 0x1e), name_0x5e: read_c_string(bytes, 0x5e, 0x1e), name_0x7c: read_c_string(bytes, 0x7c, 0x1e), name_0x9a: read_c_string(bytes, 0x9a, 0x1e), byte_0xb8, byte_0xb8_hex: format!("0x{byte_0xb8:02x}"), byte_0xb9, byte_0xb9_hex: format!("0x{byte_0xb9:02x}"), byte_0xba, byte_0xba_hex: format!("0x{byte_0xba:02x}"), dword_0xbb, dword_0xbb_hex: format!("0x{dword_0xbb:08x}"), } } fn read_u32_le(bytes: &[u8], offset: usize) -> u32 { bytes .get(offset..offset + 4) .and_then(|slice| <[u8; 4]>::try_from(slice).ok()) .map(u32::from_le_bytes) .unwrap_or(0) } fn read_c_string(bytes: &[u8], offset: usize, max_len: usize) -> String { let Some(slice) = bytes.get(offset..offset.saturating_add(max_len)) else { return String::new(); }; let end = slice .iter() .position(|byte| *byte == 0) .unwrap_or(slice.len()); String::from_utf8_lossy(&slice[..end]).into_owned() } fn load_named_binding_comparison( bindings_path: &Path, entries: &[BuildingTypeSourceEntry], ) -> Result> { let artifact = serde_json::from_str::(&fs::read_to_string(bindings_path)?)?; let named_binding_stems = artifact .bindings .into_iter() .filter_map(|binding| binding.candidate_name) .map(|candidate_name| canonicalize_building_stem(&candidate_name)) .collect::>(); let source_stems = entries .iter() .map(|entry| entry.canonical_stem.clone()) .collect::>(); Ok(BuildingTypeNamedBindingComparison { bindings_path: bindings_path.display().to_string(), named_binding_count: named_binding_stems.len(), shared_canonical_stem_count: named_binding_stems.intersection(&source_stems).count(), binding_only_canonical_stems: named_binding_stems .difference(&source_stems) .cloned() .collect(), source_only_canonical_stems: source_stems .difference(&named_binding_stems) .cloned() .collect(), }) } fn canonicalize_building_stem(stem: &str) -> String { stem.chars() .filter(|ch| !matches!(ch, ' ' | '_' | '-')) .flat_map(|ch| ch.to_lowercase()) .collect() } fn summarize_recovered_table_families( entries: &[BuildingTypeSourceEntry], files: &[BuildingTypeSourceFile], ) -> BuildingTypeRecoveredTableSummary { const RECOVERED_STYLE_THEMES: [&str; 6] = ["Victorian", "Tudor", "SoWest", "Persian", "Kyoto", "ClpBrd"]; const RECOVERED_SOURCE_KINDS: [&str; 5] = [ "StationSml", "StationMed", "StationLrg", "ServiceTower", "Maintenance", ]; let entry_by_canonical = entries .iter() .map(|entry| (entry.canonical_stem.clone(), entry)) .collect::>(); let mut present_style_station_entries = Vec::new(); for style in RECOVERED_STYLE_THEMES { for source_kind in ["StationSml", "StationMed", "StationLrg"] { let canonical = canonicalize_building_stem(&format!("{style}{source_kind}")); if let Some(entry) = entry_by_canonical.get(&canonical) { if let Some(raw_stem) = entry.raw_stems.first() { present_style_station_entries.push(raw_stem.clone()); } } } } present_style_station_entries.sort(); present_style_station_entries.dedup(); let mut present_standalone_entries = Vec::new(); for raw_name in ["ServiceTower", "Maintenance"] { let canonical = canonicalize_building_stem(raw_name); if let Some(entry) = entry_by_canonical.get(&canonical) { if let Some(raw_stem) = entry.raw_stems.first() { present_standalone_entries.push(raw_stem.clone()); } } } present_standalone_entries.sort(); present_standalone_entries.dedup(); let mut bare_port_warehouse_files = files .iter() .filter(|file| matches!(file.canonical_stem.as_str(), "port" | "warehouse")) .map(|file| file.file_name.clone()) .collect::>(); bare_port_warehouse_files.sort(); bare_port_warehouse_files.dedup(); let mut nonzero_bty_header_dword_groups = BTreeMap::>::new(); for file in files { let Some(probe) = &file.bty_header_probe else { continue; }; if probe.dword_0xbb == 0 { continue; } nonzero_bty_header_dword_groups .entry(probe.dword_0xbb) .or_default() .push(file.file_name.clone()); } let nonzero_bty_header_dword_summaries = nonzero_bty_header_dword_groups .into_iter() .map(|(dword_0xbb, mut file_names)| { file_names.sort(); file_names.dedup(); BuildingTypeBtyHeaderDwordSummary { dword_0xbb, dword_0xbb_hex: format!("0x{dword_0xbb:08x}"), file_count: file_names.len(), sample_file_names: file_names.into_iter().take(24).collect(), } }) .collect(); let nonzero_bty_header_name_0x40_summaries = summarize_nonzero_bty_header_name_lane(files, 0x40, |probe| &probe.name_0x40); let nonzero_bty_header_name_0x5e_summaries = summarize_nonzero_bty_header_name_lane(files, 0x5e, |probe| &probe.name_0x5e); let nonzero_bty_header_name_0x7c_summaries = summarize_nonzero_bty_header_name_lane(files, 0x7c, |probe| &probe.name_0x7c); BuildingTypeRecoveredTableSummary { recovered_style_themes: RECOVERED_STYLE_THEMES .into_iter() .map(str::to_string) .collect(), recovered_source_kinds: RECOVERED_SOURCE_KINDS .into_iter() .map(str::to_string) .collect(), present_style_station_entries, present_standalone_entries, bare_port_warehouse_files, nonzero_bty_header_dword_summaries, nonzero_bty_header_name_0x40_summaries, nonzero_bty_header_name_0x5e_summaries, nonzero_bty_header_name_0x7c_summaries, } } fn summarize_nonzero_bty_header_name_lane( files: &[BuildingTypeSourceFile], offset: u32, selector: impl Fn(&BuildingTypeBtyHeaderProbe) -> &String, ) -> Vec { let mut groups = BTreeMap::>::new(); for file in files { let Some(probe) = &file.bty_header_probe else { continue; }; if probe.dword_0xbb == 0 { continue; } let header_value = selector(probe).trim(); if header_value.is_empty() { continue; } groups .entry(header_value.to_string()) .or_default() .push(file.file_name.clone()); } let mut summaries = groups .into_iter() .map(|(header_value, mut file_names)| { file_names.sort(); file_names.dedup(); BuildingTypeBtyHeaderNameSummary { header_offset_hex: format!("0x{offset:02x}"), header_value, file_count: file_names.len(), sample_file_names: file_names.into_iter().take(24).collect(), } }) .collect::>(); summaries.sort_by(|left, right| { right .file_count .cmp(&left.file_count) .then_with(|| left.header_offset_hex.cmp(&right.header_offset_hex)) .then_with(|| left.header_value.cmp(&right.header_value)) }); summaries } #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] struct BuildingBindingArtifact { bindings: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] struct BuildingBindingRow { #[serde(default)] candidate_name: Option, } #[cfg(test)] mod tests { use super::*; #[test] fn probes_bca_selector_bytes_from_fixed_offsets() { let mut bytes = vec![0u8; 0xbc + 1]; bytes[0xb8] = 0x12; bytes[0xb9] = 0x34; bytes[0xba] = 0x56; bytes[0xbb] = 0x78; let probe = probe_bca_selector_bytes(&bytes); assert_eq!(probe.byte_0xb8, 0x12); assert_eq!(probe.byte_0xb9, 0x34); assert_eq!(probe.byte_0xba, 0x56); assert_eq!(probe.byte_0xbb, 0x78); assert_eq!(probe.byte_0xb8_hex, "0x12"); assert_eq!(probe.byte_0xbb_hex, "0x78"); } #[test] fn probes_bty_header_from_fixed_offsets() { let mut bytes = vec![0u8; 0xc0]; bytes[0x00..0x04].copy_from_slice(&0x03ebu32.to_le_bytes()); bytes[0x04..0x04 + 5].copy_from_slice(b"Port\0"); bytes[0x22..0x22 + 7].copy_from_slice(b"Cargo\0\0"); bytes[0x40..0x40 + 6].copy_from_slice(b"Dock\0\0"); bytes[0x5e..0x5e + 5].copy_from_slice(b"Sea\0\0"); bytes[0x7c..0x7c + 6].copy_from_slice(b"Coast\0"); bytes[0x9a..0x9a + 5].copy_from_slice(b"Port\0"); bytes[0xb8] = 0x12; bytes[0xb9] = 0x34; bytes[0xba] = 0x56; bytes[0xbb..0xbf].copy_from_slice(&0x89abcdefu32.to_le_bytes()); let probe = probe_bty_header(&bytes); assert_eq!(probe.type_id, 0x03eb); assert_eq!(probe.type_id_hex, "0x000003eb"); assert_eq!(probe.name_0x04, "Port"); assert_eq!(probe.name_0x22, "Cargo"); assert_eq!(probe.name_0x40, "Dock"); assert_eq!(probe.name_0x5e, "Sea"); assert_eq!(probe.name_0x7c, "Coast"); assert_eq!(probe.name_0x9a, "Port"); assert_eq!(probe.byte_0xb8_hex, "0x12"); assert_eq!(probe.byte_0xb9_hex, "0x34"); assert_eq!(probe.byte_0xba_hex, "0x56"); assert_eq!(probe.dword_0xbb_hex, "0x89abcdef"); } #[test] fn summarizes_recovered_table_families_from_entries_and_files() { let entries = vec![ BuildingTypeSourceEntry { canonical_stem: canonicalize_building_stem("VictorianStationSml"), raw_stems: vec!["VictorianStationSml".to_string()], source_kinds: vec![BuildingTypeSourceKind::Bty], file_names: vec!["VictorianStationSml.bty".to_string()], }, BuildingTypeSourceEntry { canonical_stem: canonicalize_building_stem("ClpBrdStationLrg"), raw_stems: vec!["ClpbrdStationLrg".to_string()], source_kinds: vec![BuildingTypeSourceKind::Bty], file_names: vec!["ClpbrdStationLrg.bty".to_string()], }, BuildingTypeSourceEntry { canonical_stem: canonicalize_building_stem("Maintenance"), raw_stems: vec!["Maintenance".to_string()], source_kinds: vec![BuildingTypeSourceKind::Bty], file_names: vec!["Maintenance.bty".to_string()], }, BuildingTypeSourceEntry { canonical_stem: canonicalize_building_stem("ServiceTower"), raw_stems: vec!["ServiceTower".to_string()], source_kinds: vec![BuildingTypeSourceKind::Bty], file_names: vec!["ServiceTower.bty".to_string()], }, ]; let files = vec![ BuildingTypeSourceFile { file_name: "Port.bty".to_string(), raw_stem: "Port".to_string(), canonical_stem: canonicalize_building_stem("Port"), source_kind: BuildingTypeSourceKind::Bty, byte_len: None, bca_selector_probe: None, bty_header_probe: Some(BuildingTypeBtyHeaderProbe { type_id: 0x03ec, type_id_hex: "0x000003ec".to_string(), name_0x04: "Port".to_string(), name_0x22: "Port".to_string(), name_0x40: "Port".to_string(), name_0x5e: "TextileMill".to_string(), name_0x7c: "Port".to_string(), name_0x9a: "Port".to_string(), byte_0xb8: 0x06, byte_0xb8_hex: "0x06".to_string(), byte_0xb9: 0x06, byte_0xb9_hex: "0x06".to_string(), byte_0xba: 0x30, byte_0xba_hex: "0x30".to_string(), dword_0xbb: 0x01f4, dword_0xbb_hex: "0x000001f4".to_string(), }), }, BuildingTypeSourceFile { file_name: "Warehouse.bca".to_string(), raw_stem: "Warehouse".to_string(), canonical_stem: canonicalize_building_stem("Warehouse"), source_kind: BuildingTypeSourceKind::Bca, byte_len: None, bca_selector_probe: None, bty_header_probe: None, }, ]; let summary = summarize_recovered_table_families(&entries, &files); assert!( summary .present_style_station_entries .contains(&"VictorianStationSml".to_string()) ); assert!( summary .present_style_station_entries .contains(&"ClpbrdStationLrg".to_string()) ); assert_eq!( summary.present_standalone_entries, vec!["Maintenance".to_string(), "ServiceTower".to_string()] ); assert_eq!( summary.bare_port_warehouse_files, vec!["Port.bty".to_string(), "Warehouse.bca".to_string()] ); assert_eq!(summary.nonzero_bty_header_dword_summaries.len(), 1); assert_eq!( summary.nonzero_bty_header_dword_summaries[0].dword_0xbb_hex, "0x000001f4" ); assert_eq!( summary.nonzero_bty_header_dword_summaries[0].sample_file_names, vec!["Port.bty".to_string()] ); assert_eq!( summary.nonzero_bty_header_name_0x40_summaries, vec![BuildingTypeBtyHeaderNameSummary { header_offset_hex: "0x40".to_string(), header_value: "Port".to_string(), file_count: 1, sample_file_names: vec!["Port.bty".to_string()], }] ); assert_eq!( summary.nonzero_bty_header_name_0x5e_summaries, vec![BuildingTypeBtyHeaderNameSummary { header_offset_hex: "0x5e".to_string(), header_value: "TextileMill".to_string(), file_count: 1, sample_file_names: vec!["Port.bty".to_string()], }] ); assert_eq!( summary.nonzero_bty_header_name_0x7c_summaries, vec![BuildingTypeBtyHeaderNameSummary { header_offset_hex: "0x7c".to_string(), header_value: "Port".to_string(), file_count: 1, sample_file_names: vec!["Port.bty".to_string()], }] ); } }