Classify zero-family stock alias clusters

This commit is contained in:
Jan Petykiewicz 2026-04-19 15:31:02 -07:00
commit b946e69bd0
4 changed files with 94 additions and 0 deletions

View file

@ -96,6 +96,7 @@ pub struct BuildingTypeRecoveredTableSummary {
pub nonzero_bty_header_name_0x40_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub nonzero_bty_header_name_0x5e_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub nonzero_bty_header_name_0x7c_summaries: Vec<BuildingTypeBtyHeaderNameSummary>,
pub bty_header_name_0x5e_dword_summaries: Vec<BuildingTypeBtyHeaderNameDwordSummary>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@ -114,6 +115,16 @@ pub struct BuildingTypeBtyHeaderNameSummary {
pub sample_file_names: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BuildingTypeBtyHeaderNameDwordSummary {
pub header_offset_hex: String,
pub header_value: String,
pub dword_0xbb: u32,
pub dword_0xbb_hex: String,
pub file_count: usize,
pub sample_file_names: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BuildingTypeSourceReport {
pub directory_path: String,
@ -483,6 +494,8 @@ fn summarize_recovered_table_families(
summarize_nonzero_bty_header_name_lane(files, 0x5e, |probe| &probe.name_0x5e);
let nonzero_bty_header_name_0x7c_summaries =
summarize_nonzero_bty_header_name_lane(files, 0x7c, |probe| &probe.name_0x7c);
let bty_header_name_0x5e_dword_summaries =
summarize_bty_header_name_lane_by_dword(files, 0x5e, |probe| &probe.name_0x5e);
BuildingTypeRecoveredTableSummary {
recovered_style_themes: RECOVERED_STYLE_THEMES
@ -500,6 +513,7 @@ fn summarize_recovered_table_families(
nonzero_bty_header_name_0x40_summaries,
nonzero_bty_header_name_0x5e_summaries,
nonzero_bty_header_name_0x7c_summaries,
bty_header_name_0x5e_dword_summaries,
}
}
@ -549,6 +563,52 @@ fn summarize_nonzero_bty_header_name_lane(
summaries
}
fn summarize_bty_header_name_lane_by_dword(
files: &[BuildingTypeSourceFile],
offset: u32,
selector: impl Fn(&BuildingTypeBtyHeaderProbe) -> &String,
) -> Vec<BuildingTypeBtyHeaderNameDwordSummary> {
let mut groups = BTreeMap::<(String, u32), Vec<String>>::new();
for file in files {
let Some(probe) = &file.bty_header_probe else {
continue;
};
let header_value = selector(probe).trim();
if header_value.is_empty() {
continue;
}
groups
.entry((header_value.to_string(), probe.dword_0xbb))
.or_default()
.push(file.file_name.clone());
}
let mut summaries = groups
.into_iter()
.map(|((header_value, dword_0xbb), mut file_names)| {
file_names.sort();
file_names.dedup();
BuildingTypeBtyHeaderNameDwordSummary {
header_offset_hex: format!("0x{offset:02x}"),
header_value,
dword_0xbb,
dword_0xbb_hex: format!("0x{dword_0xbb:08x}"),
file_count: file_names.len(),
sample_file_names: file_names.into_iter().take(24).collect(),
}
})
.collect::<Vec<_>>();
summaries.sort_by(|left, right| {
right
.file_count
.cmp(&left.file_count)
.then_with(|| left.dword_0xbb.cmp(&right.dword_0xbb))
.then_with(|| left.header_offset_hex.cmp(&right.header_offset_hex))
.then_with(|| left.header_value.cmp(&right.header_value))
});
summaries
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
struct BuildingBindingArtifact {
bindings: Vec<BuildingBindingRow>,
@ -731,5 +791,16 @@ mod tests {
sample_file_names: vec!["Port.bty".to_string()],
}]
);
assert_eq!(
summary.bty_header_name_0x5e_dword_summaries,
vec![BuildingTypeBtyHeaderNameDwordSummary {
header_offset_hex: "0x5e".to_string(),
header_value: "TextileMill".to_string(),
dword_0xbb: 0x01f4,
dword_0xbb_hex: "0x000001f4".to_string(),
file_count: 1,
sample_file_names: vec!["Port.bty".to_string()],
}]
);
}
}