Classify infrastructure short-span outlier families

This commit is contained in:
Jan Petykiewicz 2026-04-18 14:48:00 -07:00
commit 1a0653cff1
3 changed files with 446 additions and 0 deletions

View file

@ -1953,9 +1953,95 @@ pub struct SmpSavePlacedStructureDynamicSideBufferPayloadEnvelopeSummary {
pub name_prelude_candidate_summary: pub name_prelude_candidate_summary:
Option<SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidateSummary>, Option<SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidateSummary>,
#[serde(default)] #[serde(default)]
pub dominant_profile_span_class_summary:
Option<SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanClassSummary>,
#[serde(default)]
pub sample_rows: Vec<SmpSavePlacedStructureDynamicSideBufferPayloadEnvelopeSample>, pub sample_rows: Vec<SmpSavePlacedStructureDynamicSideBufferPayloadEnvelopeSample>,
} }
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanClassSummary {
pub profile_chunk_len_to_next_name_or_end: usize,
pub row_count: usize,
pub unique_name_pair_count: usize,
pub unique_compact_prefix_pattern_count: usize,
#[serde(default)]
pub dominant_candidate_pattern:
Option<SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidatePattern>,
#[serde(default)]
pub dominant_primary_name: Option<String>,
#[serde(default)]
pub dominant_secondary_name: Option<String>,
pub dominant_name_pair_count: usize,
#[serde(default)]
pub dominant_prefix_leading_dword: Option<u32>,
#[serde(default)]
pub dominant_prefix_leading_dword_hex: Option<String>,
#[serde(default)]
pub dominant_prefix_trailing_word: Option<u16>,
#[serde(default)]
pub dominant_prefix_trailing_word_hex: Option<String>,
#[serde(default)]
pub dominant_prefix_separator_byte: Option<u8>,
#[serde(default)]
pub dominant_prefix_separator_byte_hex: Option<String>,
pub dominant_prefix_count: usize,
#[serde(default)]
pub sample_rows: Vec<SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanSample>,
#[serde(default)]
pub name_pair_summaries: Vec<SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanNamePairSummary>,
#[serde(default)]
pub compact_prefix_pattern_summaries:
Vec<SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanPrefixSummary>,
#[serde(default)]
pub candidate_pattern_summaries:
Vec<SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidatePattern>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanSample {
pub sample_index: usize,
pub name_tag_relative_offset: usize,
#[serde(default)]
pub primary_name: Option<String>,
#[serde(default)]
pub secondary_name: Option<String>,
pub prefix_leading_dword: u32,
pub prefix_leading_dword_hex: String,
pub prefix_trailing_word: u16,
pub prefix_trailing_word_hex: String,
pub prefix_separator_byte: u8,
pub prefix_separator_byte_hex: String,
#[serde(default)]
pub child_count_candidate: Option<u16>,
#[serde(default)]
pub child_count_candidate_hex: Option<String>,
#[serde(default)]
pub saved_primary_child_byte_candidate: Option<u8>,
#[serde(default)]
pub saved_primary_child_byte_candidate_hex: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanNamePairSummary {
#[serde(default)]
pub primary_name: Option<String>,
#[serde(default)]
pub secondary_name: Option<String>,
pub count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanPrefixSummary {
pub prefix_leading_dword: u32,
pub prefix_leading_dword_hex: String,
pub prefix_trailing_word: u16,
pub prefix_trailing_word_hex: String,
pub prefix_separator_byte: u8,
pub prefix_separator_byte_hex: String,
pub count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferFixedPolicySummary { pub struct SmpSavePlacedStructureDynamicSideBufferFixedPolicySummary {
pub row_count_with_0x1a_policy_chunk: usize, pub row_count_with_0x1a_policy_chunk: usize,
@ -4314,6 +4400,90 @@ fn build_infrastructure_asset_trace_report(
.map(|summary| summary.dominant_trailing_word_count) .map(|summary| summary.dominant_trailing_word_count)
.unwrap_or_default() .unwrap_or_default()
), ),
side_buffer
.and_then(|probe| probe.payload_envelope_summary.as_ref())
.and_then(|summary| summary.dominant_profile_span_class_summary.as_ref())
.map(|summary| {
format!(
"the dominant 0x{:x}-byte post-profile class is now narrowed too: dominant name pair is {:?}/{:?} x{}, dominant compact prefix is {}/{}/{} x{}, and dominant prelude candidate is {}/{} x{} across {} rows",
summary.profile_chunk_len_to_next_name_or_end,
summary.dominant_primary_name,
summary.dominant_secondary_name,
summary.dominant_name_pair_count,
summary
.dominant_prefix_leading_dword_hex
.as_deref()
.unwrap_or("0x00000000"),
summary
.dominant_prefix_trailing_word_hex
.as_deref()
.unwrap_or("0x0000"),
summary
.dominant_prefix_separator_byte_hex
.as_deref()
.unwrap_or("0x00"),
summary.dominant_prefix_count,
summary
.dominant_candidate_pattern
.as_ref()
.map(|pattern| pattern.child_count_candidate_hex.as_str())
.unwrap_or("0x0000"),
summary
.dominant_candidate_pattern
.as_ref()
.map(|pattern| pattern.saved_primary_child_byte_candidate_hex.as_str())
.unwrap_or("0x00"),
summary
.dominant_candidate_pattern
.as_ref()
.map(|pattern| pattern.count)
.unwrap_or_default(),
summary.row_count
)
})
.unwrap_or_else(|| {
"no dominant post-profile class summary was available for the embedded 0x55f3 spans".to_string()
}),
side_buffer
.and_then(|probe| probe.payload_envelope_summary.as_ref())
.and_then(|summary| summary.dominant_profile_span_class_summary.as_ref())
.map(|summary| {
format!(
"the dominant post-profile outliers are now explicit too: name-pair counts={:?}, compact-prefix counts={:?}, candidate-pattern counts={:?}",
summary
.name_pair_summaries
.iter()
.map(|entry| format!(
"{:?}/{:?}:{}",
entry.primary_name, entry.secondary_name, entry.count
))
.collect::<Vec<_>>(),
summary
.compact_prefix_pattern_summaries
.iter()
.map(|entry| format!(
"{}/{}/{}:{}",
entry.prefix_leading_dword_hex,
entry.prefix_trailing_word_hex,
entry.prefix_separator_byte_hex,
entry.count
))
.collect::<Vec<_>>(),
summary
.candidate_pattern_summaries
.iter()
.map(|entry| format!(
"{}/{}:{}",
entry.child_count_candidate_hex,
entry.saved_primary_child_byte_candidate_hex,
entry.count
))
.collect::<Vec<_>>()
)
})
.unwrap_or_else(|| {
"no dominant post-profile outlier breakdown was available".to_string()
}),
side_buffer side_buffer
.and_then(|probe| probe.payload_envelope_summary.as_ref()) .and_then(|probe| probe.payload_envelope_summary.as_ref())
.and_then(|summary| summary.name_prelude_candidate_summary.as_ref()) .and_then(|summary| summary.name_prelude_candidate_summary.as_ref())
@ -13140,6 +13310,256 @@ fn parse_save_placed_structure_dynamic_side_buffer_probe(
) )
.collect(), .collect(),
}); });
let dominant_profile_span_class_summary = dominant_profile_chunk_len
.map(|(dominant_profile_span_len, _)| {
let dominant_rows = embedded_name_rows
.iter()
.zip(payload_envelope_rows.iter())
.filter_map(|(name_row, envelope_row)| {
(envelope_row.profile_chunk_len_to_next_name_or_end
== Some(dominant_profile_span_len))
.then(|| {
let candidate_offset =
name_row.name_tag_relative_offset.checked_sub(3);
let child_count_candidate = candidate_offset
.and_then(|offset| read_u16_at(records_payload, offset));
let saved_primary_child_byte_candidate = candidate_offset
.and_then(|offset| read_u8_at(records_payload, offset + 2));
(
name_row.name_tag_relative_offset,
name_row.primary_name.clone(),
name_row.secondary_name.clone(),
name_row.prefix_leading_dword,
name_row.prefix_trailing_word,
name_row.prefix_separator_byte,
child_count_candidate,
saved_primary_child_byte_candidate,
)
})
})
.collect::<Vec<_>>();
let mut dominant_name_pair_counts =
BTreeMap::<(Option<String>, Option<String>), usize>::new();
let mut dominant_prefix_counts = BTreeMap::<(u32, u16, u8), usize>::new();
let mut dominant_candidate_pattern_counts = BTreeMap::<(u16, u8), usize>::new();
for (
_,
primary_name,
secondary_name,
prefix_leading_dword,
prefix_trailing_word,
prefix_separator_byte,
child_count_candidate,
saved_primary_child_byte_candidate,
) in &dominant_rows
{
*dominant_name_pair_counts
.entry((primary_name.clone(), secondary_name.clone()))
.or_default() += 1;
*dominant_prefix_counts
.entry((
*prefix_leading_dword,
*prefix_trailing_word,
*prefix_separator_byte,
))
.or_default() += 1;
if let (Some(child_count_candidate), Some(saved_primary_child_byte_candidate)) =
(child_count_candidate, saved_primary_child_byte_candidate)
{
*dominant_candidate_pattern_counts
.entry((*child_count_candidate, *saved_primary_child_byte_candidate))
.or_default() += 1;
}
}
let dominant_name_pair = dominant_name_pair_counts
.iter()
.max_by(|(left_key, left_count), (right_key, right_count)| {
left_count
.cmp(right_count)
.then_with(|| right_key.cmp(left_key))
})
.map(|((primary_name, secondary_name), count)| {
(primary_name.clone(), secondary_name.clone(), *count)
});
let dominant_prefix = dominant_prefix_counts
.iter()
.max_by(|(left_key, left_count), (right_key, right_count)| {
left_count
.cmp(right_count)
.then_with(|| right_key.cmp(left_key))
})
.map(
|((prefix_leading_dword, prefix_trailing_word, prefix_separator_byte), count)| {
(
*prefix_leading_dword,
*prefix_trailing_word,
*prefix_separator_byte,
*count,
)
},
);
let dominant_candidate_pattern = dominant_candidate_pattern_counts
.iter()
.max_by(|(left_key, left_count), (right_key, right_count)| {
left_count
.cmp(right_count)
.then_with(|| right_key.cmp(left_key))
})
.map(
|((child_count_candidate, saved_primary_child_byte_candidate), count)| {
SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidatePattern {
child_count_candidate: *child_count_candidate,
child_count_candidate_hex: format!(
"0x{child_count_candidate:04x}"
),
saved_primary_child_byte_candidate:
*saved_primary_child_byte_candidate,
saved_primary_child_byte_candidate_hex: format!(
"0x{saved_primary_child_byte_candidate:02x}"
),
count: *count,
}
},
);
let name_pair_summaries = dominant_name_pair_counts
.iter()
.map(|((primary_name, secondary_name), count)| {
SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanNamePairSummary {
primary_name: primary_name.clone(),
secondary_name: secondary_name.clone(),
count: *count,
}
})
.take(8)
.collect::<Vec<_>>();
let compact_prefix_pattern_summaries = dominant_prefix_counts
.iter()
.map(
|((prefix_leading_dword, prefix_trailing_word, prefix_separator_byte), count)| {
SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanPrefixSummary {
prefix_leading_dword: *prefix_leading_dword,
prefix_leading_dword_hex: format!(
"0x{prefix_leading_dword:08x}"
),
prefix_trailing_word: *prefix_trailing_word,
prefix_trailing_word_hex: format!(
"0x{prefix_trailing_word:04x}"
),
prefix_separator_byte: *prefix_separator_byte,
prefix_separator_byte_hex: format!(
"0x{prefix_separator_byte:02x}"
),
count: *count,
}
},
)
.take(8)
.collect::<Vec<_>>();
let candidate_pattern_summaries = dominant_candidate_pattern_counts
.iter()
.map(
|((child_count_candidate, saved_primary_child_byte_candidate), count)| {
SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidatePattern {
child_count_candidate: *child_count_candidate,
child_count_candidate_hex: format!(
"0x{child_count_candidate:04x}"
),
saved_primary_child_byte_candidate:
*saved_primary_child_byte_candidate,
saved_primary_child_byte_candidate_hex: format!(
"0x{saved_primary_child_byte_candidate:02x}"
),
count: *count,
}
},
)
.take(8)
.collect::<Vec<_>>();
SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanClassSummary {
profile_chunk_len_to_next_name_or_end: dominant_profile_span_len,
row_count: dominant_rows.len(),
unique_name_pair_count: dominant_name_pair_counts.len(),
unique_compact_prefix_pattern_count: dominant_prefix_counts.len(),
dominant_candidate_pattern,
dominant_primary_name: dominant_name_pair
.as_ref()
.and_then(|(primary_name, _, _)| primary_name.clone()),
dominant_secondary_name: dominant_name_pair
.as_ref()
.and_then(|(_, secondary_name, _)| secondary_name.clone()),
dominant_name_pair_count: dominant_name_pair
.map(|(_, _, count)| count)
.unwrap_or_default(),
dominant_prefix_leading_dword: dominant_prefix
.map(|(prefix_leading_dword, _, _, _)| prefix_leading_dword),
dominant_prefix_leading_dword_hex: dominant_prefix.map(
|(prefix_leading_dword, _, _, _)| format!("0x{prefix_leading_dword:08x}"),
),
dominant_prefix_trailing_word: dominant_prefix
.map(|(_, prefix_trailing_word, _, _)| prefix_trailing_word),
dominant_prefix_trailing_word_hex: dominant_prefix.map(
|(_, prefix_trailing_word, _, _)| format!("0x{prefix_trailing_word:04x}"),
),
dominant_prefix_separator_byte: dominant_prefix
.map(|(_, _, prefix_separator_byte, _)| prefix_separator_byte),
dominant_prefix_separator_byte_hex: dominant_prefix.map(
|(_, _, prefix_separator_byte, _)| format!("0x{prefix_separator_byte:02x}"),
),
dominant_prefix_count: dominant_prefix
.map(|(_, _, _, count)| count)
.unwrap_or_default(),
sample_rows: dominant_rows
.iter()
.take(8)
.enumerate()
.map(
|(
sample_index,
(
name_tag_relative_offset,
primary_name,
secondary_name,
prefix_leading_dword,
prefix_trailing_word,
prefix_separator_byte,
child_count_candidate,
saved_primary_child_byte_candidate,
),
)| {
SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanSample {
sample_index,
name_tag_relative_offset: *name_tag_relative_offset,
primary_name: primary_name.clone(),
secondary_name: secondary_name.clone(),
prefix_leading_dword: *prefix_leading_dword,
prefix_leading_dword_hex: format!(
"0x{prefix_leading_dword:08x}"
),
prefix_trailing_word: *prefix_trailing_word,
prefix_trailing_word_hex: format!(
"0x{prefix_trailing_word:04x}"
),
prefix_separator_byte: *prefix_separator_byte,
prefix_separator_byte_hex: format!(
"0x{prefix_separator_byte:02x}"
),
child_count_candidate: *child_count_candidate,
child_count_candidate_hex: child_count_candidate
.map(|value| format!("0x{value:04x}")),
saved_primary_child_byte_candidate:
*saved_primary_child_byte_candidate,
saved_primary_child_byte_candidate_hex:
saved_primary_child_byte_candidate
.map(|value| format!("0x{value:02x}")),
}
},
)
.collect(),
name_pair_summaries,
compact_prefix_pattern_summaries,
candidate_pattern_summaries,
}
});
let payload_envelope_summary = Some( let payload_envelope_summary = Some(
SmpSavePlacedStructureDynamicSideBufferPayloadEnvelopeSummary { SmpSavePlacedStructureDynamicSideBufferPayloadEnvelopeSummary {
row_count_with_policy_tag_before_next_name, row_count_with_policy_tag_before_next_name,
@ -13159,6 +13579,7 @@ fn parse_save_placed_structure_dynamic_side_buffer_probe(
short_profile_flag_pair_summary: short_profile_flag_pair_summary.clone(), short_profile_flag_pair_summary: short_profile_flag_pair_summary.clone(),
fixed_policy_summary: fixed_policy_summary.clone(), fixed_policy_summary: fixed_policy_summary.clone(),
name_prelude_candidate_summary: name_prelude_candidate_summary.clone(), name_prelude_candidate_summary: name_prelude_candidate_summary.clone(),
dominant_profile_span_class_summary: dominant_profile_span_class_summary.clone(),
sample_rows: payload_envelope_rows sample_rows: payload_envelope_rows
.iter() .iter()
.take(8) .take(8)
@ -23666,6 +24087,7 @@ mod tests {
sample_rows: Vec::new(), sample_rows: Vec::new(),
}, },
), ),
dominant_profile_span_class_summary: None,
sample_rows: Vec::new(), sample_rows: Vec::new(),
}, },
), ),

View file

@ -2980,6 +2980,18 @@ The low helper strip beneath that shared family is tighter now too: `0x0052ecd0`
while the zero-length class is a separate `0x0055 / 0x00` outlier across `18/18` rows and the while the zero-length class is a separate `0x0055 / 0x00` outlier across `18/18` rows and the
`0x06` class is the only large mixed frontier left. So the next infrastructure pass should focus `0x06` class is the only large mixed frontier left. So the next infrastructure pass should focus
on classifying the mixed `0x06` rows instead of re-proving the pure-prelude `0x03` class. on classifying the mixed `0x06` rows instead of re-proving the pure-prelude `0x03` class.
That `0x06` class is now narrower too: grounded `q.gms` shows the dominant short-span family as
`BridgeSTWood_Section.3dp / Infrastructure` with compact prefix `0xff000000 / 0x0001 / 0xff`
across `62/72` rows and dominant prelude candidate `0x0001 / 0xff` across `63/72` rows. So the
next infrastructure pass should stop treating all short rows as equally ambiguous and focus on
the smaller outlier families inside that class, especially the `BallastCap`-style zero-like
rows and any remaining non-`0x0001 / 0xff` prelude candidates.
Those outliers are explicit now too: the remaining `10` short-span rows on grounded `q.gms`
break into `9` `BallastCapST_Cap.3dp / Infrastructure` rows with compact prefix
`0xf3010100 / 0x0055 / 0x00` and candidate pattern `0x0055 / 0x00`, plus `1`
`TrackCapST_Cap.3dp / Infrastructure` row with compact prefix `0xff0000ff / 0x0001 / 0xff`.
So the next infrastructure pass should target the `BallastCap` outlier family first instead of
spending time on the already-dominant bridge-section class.
The child loader family is explicit now too: local `.rdata` at `0x005cfd00` proves the The child loader family is explicit now too: local `.rdata` at `0x005cfd00` proves the
`Infrastructure` child vtable uses the shared tagged callback strip directly, with `Infrastructure` child vtable uses the shared tagged callback strip directly, with
`+0x40 = 0x00455fc0`, `+0x48 = 0x00455870`, and `+0x4c = 0x00455930`. So the remaining `+0x40 = 0x00455fc0`, `+0x48 = 0x00455870`, and `+0x4c = 0x00455930`. So the remaining

View file

@ -131,6 +131,18 @@ Working rule:
with dominant pattern `0x0055 / 0x00` across `18/18` rows and the `0x06` class remains the only with dominant pattern `0x0055 / 0x00` across `18/18` rows and the `0x06` class remains the only
large mixed frontier. So the next infrastructure slice should focus on classifying the mixed large mixed frontier. So the next infrastructure slice should focus on classifying the mixed
`0x06` rows, not on rediscovering the already-grounded pure-prelude `0x03` rows. `0x06` rows, not on rediscovering the already-grounded pure-prelude `0x03` rows.
- That `0x06` class is now narrower too: grounded `q.gms` shows the dominant short-span class as
`BridgeSTWood_Section.3dp / Infrastructure` with compact prefix `0xff000000 / 0x0001 / 0xff`
across `62/72` rows and dominant prelude candidate `0x0001 / 0xff` across `63/72` rows. So the
next infrastructure slice should stop treating the `0x06` class as uniformly ambiguous and focus
on the smaller outlier families inside that class, especially the zero-like `BallastCap`-style
rows and any remaining non-`0x0001 / 0xff` prelude candidates.
- Those outliers are explicit now too: the remaining `10` short-span rows on grounded `q.gms`
break into `9` `BallastCapST_Cap.3dp / Infrastructure` rows with compact prefix
`0xf3010100 / 0x0055 / 0x00` and candidate pattern `0x0055 / 0x00`, plus `1`
`TrackCapST_Cap.3dp / Infrastructure` row with compact prefix `0xff0000ff / 0x0001 / 0xff`.
So the next infrastructure slice should target the `BallastCap` outlier family first, not the
already-dominant bridge-section class.
- Reconstruct the save-side region record body on top of the newly corrected non-direct tagged - Reconstruct the save-side region record body on top of the newly corrected non-direct tagged
region seam (`0x5209/0x520a/0x520b`, stride hint `0x06`, `Marker09` record stems) now that the region seam (`0x5209/0x520a/0x520b`, stride hint `0x06`, `Marker09` record stems) now that the
`0x55f3` payload is known to be fully consumed by the embedded profile collection on grounded `0x55f3` payload is known to be fully consumed by the embedded profile collection on grounded