use std::collections::BTreeSet; use std::fs; use std::path::Path; use serde::{Deserialize, Serialize}; use crate::pk4::inspect_pk4_bytes; pub const NAMED_CARGO_PRICE_DESCRIPTOR_ROW_COUNT: usize = 71; pub const NAMED_CARGO_PRODUCTION_DESCRIPTOR_ROW_COUNT: usize = 50; pub const CARGO_TYPE_MAGIC: u32 = 0x0000_03ea; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoNameToken { pub raw_name: String, pub visible_name: String, #[serde(default)] pub localized_string_id: Option, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoTypeEntry { pub file_name: String, pub file_size: usize, pub file_size_hex: String, pub header_magic: u32, pub header_magic_hex: String, pub name: CargoNameToken, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoTypeInspectionReport { pub directory_path: String, pub entry_count: usize, pub unique_visible_name_count: usize, pub notes: Vec, pub entries: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoSkinDescriptorEntry { pub pk4_entry_name: String, pub payload_len: usize, pub payload_len_hex: String, pub descriptor_kind: String, pub name: CargoNameToken, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoSkinInspectionReport { pub pk4_path: String, pub entry_count: usize, pub unique_visible_name_count: usize, pub notes: Vec, pub entries: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoEconomySourceReport { pub cargo_types_dir: String, pub cargo_skin_pk4_path: String, pub named_cargo_price_row_count: usize, pub named_cargo_production_row_count: usize, pub cargo_type_count: usize, pub cargo_skin_count: usize, pub shared_visible_name_count: usize, pub visible_name_union_count: usize, pub cargo_type_only_visible_names: Vec, pub cargo_skin_only_visible_names: Vec, pub live_registry_count: usize, pub live_registry_entries: Vec, pub price_selector_candidate_excess_count: usize, pub price_selector_candidate_only_visible_names: Vec, pub production_selector: Option, pub price_selector: CargoSelectorReport, pub notes: Vec, pub cargo_type_entries: Vec, pub cargo_skin_entries: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum CargoRegistrySourceKind { CargoTypes, CargoSkin, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoLiveRegistryEntry { pub visible_name: String, pub raw_names: Vec, pub localized_string_ids: Vec, pub source_kinds: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoSelectorEntry { pub selector_index: usize, #[serde(default)] pub descriptor_id: Option, pub visible_name: String, pub source_kinds: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CargoSelectorReport { pub selector_kind: String, pub exact_resolution: bool, pub selector_row_count: usize, pub candidate_registry_count: usize, pub notes: Vec, pub entries: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] struct CargoBindingArtifact { bindings: Vec, } #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] struct CargoBindingRow { descriptor_id: u32, band: String, cargo_name: String, binding_index: usize, } pub fn inspect_cargo_types_dir( path: &Path, ) -> Result> { let mut entries = Vec::new(); for entry in fs::read_dir(path)? { let entry = entry?; if !entry.file_type()?.is_file() { continue; } let file_name = entry.file_name(); let file_name = file_name.to_string_lossy().into_owned(); if Path::new(&file_name) .extension() .and_then(|extension| extension.to_str()) .map(|extension| extension.eq_ignore_ascii_case("cty")) != Some(true) { continue; } let bytes = fs::read(entry.path())?; entries.push(parse_cargo_type_entry(&file_name, &bytes)?); } entries.sort_by(|left, right| left.name.visible_name.cmp(&right.name.visible_name)); let mut notes = Vec::new(); notes.push( "CargoTypes entries carry a 0x03ea header and an inline NUL-terminated cargo token string." .to_string(), ); notes.push( "A leading `~####` token is preserved as raw_name and normalized into visible_name by stripping the localized string id prefix." .to_string(), ); let unique_visible_name_count = entries .iter() .map(|entry| entry.name.visible_name.as_str()) .collect::>() .len(); Ok(CargoTypeInspectionReport { directory_path: path.display().to_string(), entry_count: entries.len(), unique_visible_name_count, notes, entries, }) } pub fn inspect_cargo_skin_pk4( path: &Path, ) -> Result> { let bytes = fs::read(path)?; let inspection = inspect_pk4_bytes(&bytes)?; let mut entries = Vec::new(); for entry in &inspection.entries { if entry.extension.as_deref() != Some("dsc") { continue; } let payload = bytes .get(entry.payload_absolute_offset..entry.payload_end_offset) .ok_or_else(|| format!("pk4 payload range out of bounds for {}", entry.name))?; let parsed = parse_cargo_skin_descriptor_entry(&entry.name, payload)?; entries.push(parsed); } entries.sort_by(|left, right| left.name.visible_name.cmp(&right.name.visible_name)); let mut notes = Vec::new(); notes.push( "cargoSkin descriptors are parsed from .dsc payloads whose first non-empty line names the descriptor kind and whose second non-empty line carries the cargo token." .to_string(), ); notes.push( "A leading `~####` token is preserved as raw_name and normalized into visible_name by stripping the localized string id prefix." .to_string(), ); let unique_visible_name_count = entries .iter() .map(|entry| entry.name.visible_name.as_str()) .collect::>() .len(); Ok(CargoSkinInspectionReport { pk4_path: path.display().to_string(), entry_count: entries.len(), unique_visible_name_count, notes, entries, }) } pub fn inspect_cargo_economy_sources( cargo_types_dir: &Path, cargo_skin_pk4_path: &Path, ) -> Result> { inspect_cargo_economy_sources_with_bindings(cargo_types_dir, cargo_skin_pk4_path, None) } pub fn inspect_cargo_economy_sources_with_bindings( cargo_types_dir: &Path, cargo_skin_pk4_path: &Path, cargo_bindings_path: Option<&Path>, ) -> Result> { let cargo_types = inspect_cargo_types_dir(cargo_types_dir)?; let cargo_skins = inspect_cargo_skin_pk4(cargo_skin_pk4_path)?; let cargo_bindings = load_cargo_bindings(cargo_bindings_path)?; Ok(build_cargo_economy_source_report( cargo_types, cargo_skins, cargo_bindings.as_deref(), )) } fn build_cargo_economy_source_report( cargo_types: CargoTypeInspectionReport, cargo_skins: CargoSkinInspectionReport, cargo_bindings: Option<&[CargoBindingRow]>, ) -> CargoEconomySourceReport { let cargo_type_visible_names = cargo_types .entries .iter() .map(|entry| entry.name.visible_name.clone()) .collect::>(); let cargo_skin_visible_names = cargo_skins .entries .iter() .map(|entry| entry.name.visible_name.clone()) .collect::>(); let shared_visible_name_count = cargo_type_visible_names .intersection(&cargo_skin_visible_names) .count(); let visible_name_union_count = cargo_type_visible_names .union(&cargo_skin_visible_names) .count(); let cargo_type_only_visible_names = cargo_type_visible_names .difference(&cargo_skin_visible_names) .cloned() .collect::>(); let cargo_skin_only_visible_names = cargo_skin_visible_names .difference(&cargo_type_visible_names) .cloned() .collect::>(); let live_registry_entries = build_live_registry_entries(&cargo_types.entries, &cargo_skins.entries); let production_selector = cargo_bindings.map(|bindings| build_production_selector(bindings, &live_registry_entries)); let price_selector_candidate_only_visible_names = production_selector .as_ref() .map(|selector| { let selector_names = selector .entries .iter() .map(|entry| entry.visible_name.as_str()) .collect::>(); live_registry_entries .iter() .filter(|entry| !selector_names.contains(entry.visible_name.as_str())) .map(|entry| entry.visible_name.clone()) .collect::>() }) .unwrap_or_default(); let price_selector = build_price_selector(&live_registry_entries); let price_selector_candidate_excess_count = live_registry_entries .len() .saturating_sub(NAMED_CARGO_PRICE_DESCRIPTOR_ROW_COUNT); let mut notes = Vec::new(); notes.push(format!( "Named cargo-price descriptors 106..176 span {} rows, while named cargo-production descriptors 180..229 span {} rows.", NAMED_CARGO_PRICE_DESCRIPTOR_ROW_COUNT, NAMED_CARGO_PRODUCTION_DESCRIPTOR_ROW_COUNT )); notes.push(format!( "The inspected CargoTypes corpus exposes {} visible names, the inspected cargoSkin corpus exposes {} visible names, and their union exposes {} visible names.", cargo_types.unique_visible_name_count, cargo_skins.unique_visible_name_count, visible_name_union_count )); if visible_name_union_count != NAMED_CARGO_PRICE_DESCRIPTOR_ROW_COUNT { notes.push( "That visible-name union still does not match the 71-row named cargo-price strip, so this offline source reconstruction is groundwork rather than a complete price-selector binding." .to_string(), ); } else { notes.push( "The visible-name union matches the 71-row named cargo-price strip; a later pass can decide whether ordering evidence is now strong enough for descriptor bindings." .to_string(), ); } if cargo_types.unique_visible_name_count == NAMED_CARGO_PRODUCTION_DESCRIPTOR_ROW_COUNT { notes.push( "The CargoTypes corpus still matches the 50-row named cargo-production strip cardinality that grounds the current production bindings." .to_string(), ); } CargoEconomySourceReport { cargo_types_dir: cargo_types.directory_path, cargo_skin_pk4_path: cargo_skins.pk4_path, named_cargo_price_row_count: NAMED_CARGO_PRICE_DESCRIPTOR_ROW_COUNT, named_cargo_production_row_count: NAMED_CARGO_PRODUCTION_DESCRIPTOR_ROW_COUNT, cargo_type_count: cargo_types.entry_count, cargo_skin_count: cargo_skins.entry_count, shared_visible_name_count, visible_name_union_count, cargo_type_only_visible_names, cargo_skin_only_visible_names, live_registry_count: live_registry_entries.len(), live_registry_entries, price_selector_candidate_excess_count, price_selector_candidate_only_visible_names, production_selector, price_selector, notes, cargo_type_entries: cargo_types.entries, cargo_skin_entries: cargo_skins.entries, } } fn build_live_registry_entries( cargo_type_entries: &[CargoTypeEntry], cargo_skin_entries: &[CargoSkinDescriptorEntry], ) -> Vec { let mut visible_names = cargo_type_entries .iter() .map(|entry| entry.name.visible_name.clone()) .chain( cargo_skin_entries .iter() .map(|entry| entry.name.visible_name.clone()), ) .collect::>() .into_iter() .collect::>(); visible_names.sort(); visible_names .into_iter() .map(|visible_name| { let mut raw_names = cargo_type_entries .iter() .filter(|entry| entry.name.visible_name == visible_name) .map(|entry| entry.name.raw_name.clone()) .chain( cargo_skin_entries .iter() .filter(|entry| entry.name.visible_name == visible_name) .map(|entry| entry.name.raw_name.clone()), ) .collect::>() .into_iter() .collect::>(); raw_names.sort(); let localized_string_ids = cargo_type_entries .iter() .filter(|entry| entry.name.visible_name == visible_name) .filter_map(|entry| entry.name.localized_string_id) .chain( cargo_skin_entries .iter() .filter(|entry| entry.name.visible_name == visible_name) .filter_map(|entry| entry.name.localized_string_id), ) .collect::>() .into_iter() .collect::>(); let mut source_kinds = Vec::new(); if cargo_type_entries .iter() .any(|entry| entry.name.visible_name == visible_name) { source_kinds.push(CargoRegistrySourceKind::CargoTypes); } if cargo_skin_entries .iter() .any(|entry| entry.name.visible_name == visible_name) { source_kinds.push(CargoRegistrySourceKind::CargoSkin); } CargoLiveRegistryEntry { visible_name, raw_names, localized_string_ids, source_kinds, } }) .collect() } fn build_production_selector( bindings: &[CargoBindingRow], live_registry_entries: &[CargoLiveRegistryEntry], ) -> CargoSelectorReport { let mut rows = bindings .iter() .filter(|binding| binding.band == "cargo_production_named") .collect::>(); rows.sort_by_key(|binding| binding.binding_index); let entries = rows .into_iter() .map(|binding| CargoSelectorEntry { selector_index: binding.binding_index, descriptor_id: Some(binding.descriptor_id), visible_name: binding.cargo_name.clone(), source_kinds: live_registry_entries .iter() .find(|entry| entry.visible_name == binding.cargo_name) .map(|entry| entry.source_kinds.clone()) .unwrap_or_default(), }) .collect::>(); CargoSelectorReport { selector_kind: "named_cargo_production".to_string(), exact_resolution: entries.len() == NAMED_CARGO_PRODUCTION_DESCRIPTOR_ROW_COUNT, selector_row_count: NAMED_CARGO_PRODUCTION_DESCRIPTOR_ROW_COUNT, candidate_registry_count: live_registry_entries.len(), notes: vec![ "This selector is grounded from the checked-in named cargo production bindings artifact." .to_string(), "The current grounded order matches the 50-row named cargo-production descriptor strip." .to_string(), ], entries, } } fn build_price_selector(live_registry_entries: &[CargoLiveRegistryEntry]) -> CargoSelectorReport { let entries = live_registry_entries .iter() .enumerate() .map(|(index, entry)| CargoSelectorEntry { selector_index: index + 1, descriptor_id: None, visible_name: entry.visible_name.clone(), source_kinds: entry.source_kinds.clone(), }) .collect::>(); let candidate_registry_count = entries.len(); let exact_resolution = candidate_registry_count == NAMED_CARGO_PRICE_DESCRIPTOR_ROW_COUNT; let mut notes = Vec::new(); notes.push( "This is the current merged visible-name registry, sorted lexicographically, not a claimed reproduction of the original price selector." .to_string(), ); if exact_resolution { notes.push( "The merged visible-name registry cardinality matches the 71-row named cargo-price descriptor strip." .to_string(), ); } else { notes.push(format!( "The merged visible-name registry has {} entries, so the exact 71-row price-selector binding remains unresolved by static source recovery alone.", candidate_registry_count )); let excess = candidate_registry_count.saturating_sub(NAMED_CARGO_PRICE_DESCRIPTOR_ROW_COUNT); notes.push(format!( "Current unresolved gap is {} excess candidate names relative to the descriptor strip.", excess )); } CargoSelectorReport { selector_kind: "named_cargo_price_candidate_registry".to_string(), exact_resolution, selector_row_count: NAMED_CARGO_PRICE_DESCRIPTOR_ROW_COUNT, candidate_registry_count, notes, entries, } } fn load_cargo_bindings( path: Option<&Path>, ) -> Result>, Box> { let Some(path) = path else { return Ok(None); }; if !path.exists() { return Ok(None); } let artifact: CargoBindingArtifact = serde_json::from_str(&fs::read_to_string(path)?)?; Ok(Some(artifact.bindings)) } fn parse_cargo_type_entry( file_name: &str, bytes: &[u8], ) -> Result> { if bytes.len() < 5 { return Err(format!("cargo type entry {file_name} is too short").into()); } let header_magic = u32::from_le_bytes(bytes[0..4].try_into().expect("length checked")); let raw_name = parse_nul_terminated_utf8(bytes, 4) .ok_or_else(|| format!("cargo type entry {file_name} is missing a NUL-terminated name"))?; Ok(CargoTypeEntry { file_name: file_name.to_string(), file_size: bytes.len(), file_size_hex: format!("0x{:x}", bytes.len()), header_magic, header_magic_hex: format!("0x{header_magic:08x}"), name: parse_cargo_name_token(&raw_name), }) } fn parse_cargo_skin_descriptor_entry( entry_name: &str, bytes: &[u8], ) -> Result> { let text = std::str::from_utf8(bytes)?; let mut lines = text.lines().map(str::trim).filter(|line| !line.is_empty()); let descriptor_kind = lines .next() .ok_or_else(|| format!("cargo skin descriptor {entry_name} is missing the kind line"))?; let raw_name = lines .next() .ok_or_else(|| format!("cargo skin descriptor {entry_name} is missing the name line"))?; Ok(CargoSkinDescriptorEntry { pk4_entry_name: entry_name.to_string(), payload_len: bytes.len(), payload_len_hex: format!("0x{:x}", bytes.len()), descriptor_kind: descriptor_kind.to_string(), name: parse_cargo_name_token(raw_name), }) } fn parse_nul_terminated_utf8(bytes: &[u8], offset: usize) -> Option { let tail = bytes.get(offset..)?; let end = tail.iter().position(|byte| *byte == 0)?; String::from_utf8(tail[..end].to_vec()).ok() } fn parse_cargo_name_token(raw_name: &str) -> CargoNameToken { let mut visible_name = raw_name.to_string(); let mut localized_string_id = None; if let Some(rest) = raw_name.strip_prefix('~') { let digits = rest .chars() .take_while(|character| character.is_ascii_digit()) .collect::(); if !digits.is_empty() { localized_string_id = digits.parse::().ok(); visible_name = rest[digits.len()..].to_string(); } } CargoNameToken { raw_name: raw_name.to_string(), visible_name, localized_string_id, } } #[cfg(test)] mod tests { use super::*; #[test] fn parses_plain_cargo_type_entry() { let mut bytes = Vec::new(); bytes.extend_from_slice(&CARGO_TYPE_MAGIC.to_le_bytes()); bytes.extend_from_slice(b"Alcohol\0"); let entry = parse_cargo_type_entry("Alcohol.cty", &bytes).expect("entry should parse"); assert_eq!(entry.header_magic, CARGO_TYPE_MAGIC); assert_eq!(entry.name.raw_name, "Alcohol"); assert_eq!(entry.name.visible_name, "Alcohol"); assert_eq!(entry.name.localized_string_id, None); } #[test] fn parses_localized_cargo_type_entry() { let mut bytes = Vec::new(); bytes.extend_from_slice(&CARGO_TYPE_MAGIC.to_le_bytes()); bytes.extend_from_slice(b"~4465Ceramics\0"); let entry = parse_cargo_type_entry("~4465Ceramics.cty", &bytes).expect("entry should parse"); assert_eq!(entry.name.raw_name, "~4465Ceramics"); assert_eq!(entry.name.visible_name, "Ceramics"); assert_eq!(entry.name.localized_string_id, Some(4465)); } #[test] fn parses_cargo_skin_descriptor_entry() { let entry = parse_cargo_skin_descriptor_entry("Alcohol.dsc", b"cargoSkin\r\nAlcohol\r\n") .expect("descriptor should parse"); assert_eq!(entry.descriptor_kind, "cargoSkin"); assert_eq!(entry.name.visible_name, "Alcohol"); } #[test] fn builds_cargo_source_report_union_counts() { let cargo_types = CargoTypeInspectionReport { directory_path: "CargoTypes".to_string(), entry_count: 2, unique_visible_name_count: 2, notes: Vec::new(), entries: vec![ CargoTypeEntry { file_name: "Alcohol.cty".to_string(), file_size: 16, file_size_hex: "0x10".to_string(), header_magic: CARGO_TYPE_MAGIC, header_magic_hex: format!("0x{CARGO_TYPE_MAGIC:08x}"), name: parse_cargo_name_token("Alcohol"), }, CargoTypeEntry { file_name: "Coal.cty".to_string(), file_size: 16, file_size_hex: "0x10".to_string(), header_magic: CARGO_TYPE_MAGIC, header_magic_hex: format!("0x{CARGO_TYPE_MAGIC:08x}"), name: parse_cargo_name_token("Coal"), }, ], }; let cargo_skins = CargoSkinInspectionReport { pk4_path: "Cargo106.PK4".to_string(), entry_count: 2, unique_visible_name_count: 2, notes: Vec::new(), entries: vec![ CargoSkinDescriptorEntry { pk4_entry_name: "Alcohol.dsc".to_string(), payload_len: 20, payload_len_hex: "0x14".to_string(), descriptor_kind: "cargoSkin".to_string(), name: parse_cargo_name_token("Alcohol"), }, CargoSkinDescriptorEntry { pk4_entry_name: "Beer.dsc".to_string(), payload_len: 16, payload_len_hex: "0x10".to_string(), descriptor_kind: "cargoSkin".to_string(), name: parse_cargo_name_token("Beer"), }, ], }; let report = build_cargo_economy_source_report(cargo_types, cargo_skins, None); assert_eq!(report.shared_visible_name_count, 1); assert_eq!(report.visible_name_union_count, 3); assert_eq!(report.live_registry_count, 3); assert_eq!( report.cargo_type_only_visible_names, vec!["Coal".to_string()] ); assert_eq!( report.cargo_skin_only_visible_names, vec!["Beer".to_string()] ); assert!(!report.price_selector.exact_resolution); assert_eq!(report.price_selector.candidate_registry_count, 3); assert_eq!(report.price_selector_candidate_excess_count, 0); assert!( report .price_selector_candidate_only_visible_names .is_empty() ); assert!(report.production_selector.is_none()); } #[test] fn builds_exact_production_selector_from_bindings() { let cargo_types = CargoTypeInspectionReport { directory_path: "CargoTypes".to_string(), entry_count: 2, unique_visible_name_count: 2, notes: Vec::new(), entries: vec![ CargoTypeEntry { file_name: "Alcohol.cty".to_string(), file_size: 16, file_size_hex: "0x10".to_string(), header_magic: CARGO_TYPE_MAGIC, header_magic_hex: format!("0x{CARGO_TYPE_MAGIC:08x}"), name: parse_cargo_name_token("Alcohol"), }, CargoTypeEntry { file_name: "Coal.cty".to_string(), file_size: 16, file_size_hex: "0x10".to_string(), header_magic: CARGO_TYPE_MAGIC, header_magic_hex: format!("0x{CARGO_TYPE_MAGIC:08x}"), name: parse_cargo_name_token("Coal"), }, ], }; let cargo_skins = CargoSkinInspectionReport { pk4_path: "Cargo106.PK4".to_string(), entry_count: 1, unique_visible_name_count: 1, notes: Vec::new(), entries: vec![CargoSkinDescriptorEntry { pk4_entry_name: "Alcohol.dsc".to_string(), payload_len: 20, payload_len_hex: "0x14".to_string(), descriptor_kind: "cargoSkin".to_string(), name: parse_cargo_name_token("Alcohol"), }], }; let bindings = vec![ CargoBindingRow { descriptor_id: 180, band: "cargo_production_named".to_string(), cargo_name: "Alcohol".to_string(), binding_index: 1, }, CargoBindingRow { descriptor_id: 181, band: "cargo_production_named".to_string(), cargo_name: "Coal".to_string(), binding_index: 2, }, ]; let report = build_cargo_economy_source_report(cargo_types, cargo_skins, Some(&bindings)); let selector = report .production_selector .expect("production selector should exist"); assert_eq!(selector.entries.len(), 2); assert_eq!(selector.entries[0].descriptor_id, Some(180)); assert_eq!(selector.entries[0].visible_name, "Alcohol"); assert_eq!( selector.entries[0].source_kinds, vec![ CargoRegistrySourceKind::CargoTypes, CargoRegistrySourceKind::CargoSkin ] ); assert_eq!(selector.entries[1].visible_name, "Coal"); assert!( report .price_selector_candidate_only_visible_names .is_empty() ); assert_eq!(report.price_selector_candidate_excess_count, 0); } }