Classify infrastructure short-span outlier families

This commit is contained in:
Jan Petykiewicz 2026-04-18 14:48:00 -07:00
commit 1a0653cff1
3 changed files with 446 additions and 0 deletions

View file

@ -1953,9 +1953,95 @@ pub struct SmpSavePlacedStructureDynamicSideBufferPayloadEnvelopeSummary {
pub name_prelude_candidate_summary:
Option<SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidateSummary>,
#[serde(default)]
pub dominant_profile_span_class_summary:
Option<SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanClassSummary>,
#[serde(default)]
pub sample_rows: Vec<SmpSavePlacedStructureDynamicSideBufferPayloadEnvelopeSample>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanClassSummary {
pub profile_chunk_len_to_next_name_or_end: usize,
pub row_count: usize,
pub unique_name_pair_count: usize,
pub unique_compact_prefix_pattern_count: usize,
#[serde(default)]
pub dominant_candidate_pattern:
Option<SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidatePattern>,
#[serde(default)]
pub dominant_primary_name: Option<String>,
#[serde(default)]
pub dominant_secondary_name: Option<String>,
pub dominant_name_pair_count: usize,
#[serde(default)]
pub dominant_prefix_leading_dword: Option<u32>,
#[serde(default)]
pub dominant_prefix_leading_dword_hex: Option<String>,
#[serde(default)]
pub dominant_prefix_trailing_word: Option<u16>,
#[serde(default)]
pub dominant_prefix_trailing_word_hex: Option<String>,
#[serde(default)]
pub dominant_prefix_separator_byte: Option<u8>,
#[serde(default)]
pub dominant_prefix_separator_byte_hex: Option<String>,
pub dominant_prefix_count: usize,
#[serde(default)]
pub sample_rows: Vec<SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanSample>,
#[serde(default)]
pub name_pair_summaries: Vec<SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanNamePairSummary>,
#[serde(default)]
pub compact_prefix_pattern_summaries:
Vec<SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanPrefixSummary>,
#[serde(default)]
pub candidate_pattern_summaries:
Vec<SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidatePattern>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanSample {
pub sample_index: usize,
pub name_tag_relative_offset: usize,
#[serde(default)]
pub primary_name: Option<String>,
#[serde(default)]
pub secondary_name: Option<String>,
pub prefix_leading_dword: u32,
pub prefix_leading_dword_hex: String,
pub prefix_trailing_word: u16,
pub prefix_trailing_word_hex: String,
pub prefix_separator_byte: u8,
pub prefix_separator_byte_hex: String,
#[serde(default)]
pub child_count_candidate: Option<u16>,
#[serde(default)]
pub child_count_candidate_hex: Option<String>,
#[serde(default)]
pub saved_primary_child_byte_candidate: Option<u8>,
#[serde(default)]
pub saved_primary_child_byte_candidate_hex: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanNamePairSummary {
#[serde(default)]
pub primary_name: Option<String>,
#[serde(default)]
pub secondary_name: Option<String>,
pub count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanPrefixSummary {
pub prefix_leading_dword: u32,
pub prefix_leading_dword_hex: String,
pub prefix_trailing_word: u16,
pub prefix_trailing_word_hex: String,
pub prefix_separator_byte: u8,
pub prefix_separator_byte_hex: String,
pub count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SmpSavePlacedStructureDynamicSideBufferFixedPolicySummary {
pub row_count_with_0x1a_policy_chunk: usize,
@ -4314,6 +4400,90 @@ fn build_infrastructure_asset_trace_report(
.map(|summary| summary.dominant_trailing_word_count)
.unwrap_or_default()
),
side_buffer
.and_then(|probe| probe.payload_envelope_summary.as_ref())
.and_then(|summary| summary.dominant_profile_span_class_summary.as_ref())
.map(|summary| {
format!(
"the dominant 0x{:x}-byte post-profile class is now narrowed too: dominant name pair is {:?}/{:?} x{}, dominant compact prefix is {}/{}/{} x{}, and dominant prelude candidate is {}/{} x{} across {} rows",
summary.profile_chunk_len_to_next_name_or_end,
summary.dominant_primary_name,
summary.dominant_secondary_name,
summary.dominant_name_pair_count,
summary
.dominant_prefix_leading_dword_hex
.as_deref()
.unwrap_or("0x00000000"),
summary
.dominant_prefix_trailing_word_hex
.as_deref()
.unwrap_or("0x0000"),
summary
.dominant_prefix_separator_byte_hex
.as_deref()
.unwrap_or("0x00"),
summary.dominant_prefix_count,
summary
.dominant_candidate_pattern
.as_ref()
.map(|pattern| pattern.child_count_candidate_hex.as_str())
.unwrap_or("0x0000"),
summary
.dominant_candidate_pattern
.as_ref()
.map(|pattern| pattern.saved_primary_child_byte_candidate_hex.as_str())
.unwrap_or("0x00"),
summary
.dominant_candidate_pattern
.as_ref()
.map(|pattern| pattern.count)
.unwrap_or_default(),
summary.row_count
)
})
.unwrap_or_else(|| {
"no dominant post-profile class summary was available for the embedded 0x55f3 spans".to_string()
}),
side_buffer
.and_then(|probe| probe.payload_envelope_summary.as_ref())
.and_then(|summary| summary.dominant_profile_span_class_summary.as_ref())
.map(|summary| {
format!(
"the dominant post-profile outliers are now explicit too: name-pair counts={:?}, compact-prefix counts={:?}, candidate-pattern counts={:?}",
summary
.name_pair_summaries
.iter()
.map(|entry| format!(
"{:?}/{:?}:{}",
entry.primary_name, entry.secondary_name, entry.count
))
.collect::<Vec<_>>(),
summary
.compact_prefix_pattern_summaries
.iter()
.map(|entry| format!(
"{}/{}/{}:{}",
entry.prefix_leading_dword_hex,
entry.prefix_trailing_word_hex,
entry.prefix_separator_byte_hex,
entry.count
))
.collect::<Vec<_>>(),
summary
.candidate_pattern_summaries
.iter()
.map(|entry| format!(
"{}/{}:{}",
entry.child_count_candidate_hex,
entry.saved_primary_child_byte_candidate_hex,
entry.count
))
.collect::<Vec<_>>()
)
})
.unwrap_or_else(|| {
"no dominant post-profile outlier breakdown was available".to_string()
}),
side_buffer
.and_then(|probe| probe.payload_envelope_summary.as_ref())
.and_then(|summary| summary.name_prelude_candidate_summary.as_ref())
@ -13140,6 +13310,256 @@ fn parse_save_placed_structure_dynamic_side_buffer_probe(
)
.collect(),
});
let dominant_profile_span_class_summary = dominant_profile_chunk_len
.map(|(dominant_profile_span_len, _)| {
let dominant_rows = embedded_name_rows
.iter()
.zip(payload_envelope_rows.iter())
.filter_map(|(name_row, envelope_row)| {
(envelope_row.profile_chunk_len_to_next_name_or_end
== Some(dominant_profile_span_len))
.then(|| {
let candidate_offset =
name_row.name_tag_relative_offset.checked_sub(3);
let child_count_candidate = candidate_offset
.and_then(|offset| read_u16_at(records_payload, offset));
let saved_primary_child_byte_candidate = candidate_offset
.and_then(|offset| read_u8_at(records_payload, offset + 2));
(
name_row.name_tag_relative_offset,
name_row.primary_name.clone(),
name_row.secondary_name.clone(),
name_row.prefix_leading_dword,
name_row.prefix_trailing_word,
name_row.prefix_separator_byte,
child_count_candidate,
saved_primary_child_byte_candidate,
)
})
})
.collect::<Vec<_>>();
let mut dominant_name_pair_counts =
BTreeMap::<(Option<String>, Option<String>), usize>::new();
let mut dominant_prefix_counts = BTreeMap::<(u32, u16, u8), usize>::new();
let mut dominant_candidate_pattern_counts = BTreeMap::<(u16, u8), usize>::new();
for (
_,
primary_name,
secondary_name,
prefix_leading_dword,
prefix_trailing_word,
prefix_separator_byte,
child_count_candidate,
saved_primary_child_byte_candidate,
) in &dominant_rows
{
*dominant_name_pair_counts
.entry((primary_name.clone(), secondary_name.clone()))
.or_default() += 1;
*dominant_prefix_counts
.entry((
*prefix_leading_dword,
*prefix_trailing_word,
*prefix_separator_byte,
))
.or_default() += 1;
if let (Some(child_count_candidate), Some(saved_primary_child_byte_candidate)) =
(child_count_candidate, saved_primary_child_byte_candidate)
{
*dominant_candidate_pattern_counts
.entry((*child_count_candidate, *saved_primary_child_byte_candidate))
.or_default() += 1;
}
}
let dominant_name_pair = dominant_name_pair_counts
.iter()
.max_by(|(left_key, left_count), (right_key, right_count)| {
left_count
.cmp(right_count)
.then_with(|| right_key.cmp(left_key))
})
.map(|((primary_name, secondary_name), count)| {
(primary_name.clone(), secondary_name.clone(), *count)
});
let dominant_prefix = dominant_prefix_counts
.iter()
.max_by(|(left_key, left_count), (right_key, right_count)| {
left_count
.cmp(right_count)
.then_with(|| right_key.cmp(left_key))
})
.map(
|((prefix_leading_dword, prefix_trailing_word, prefix_separator_byte), count)| {
(
*prefix_leading_dword,
*prefix_trailing_word,
*prefix_separator_byte,
*count,
)
},
);
let dominant_candidate_pattern = dominant_candidate_pattern_counts
.iter()
.max_by(|(left_key, left_count), (right_key, right_count)| {
left_count
.cmp(right_count)
.then_with(|| right_key.cmp(left_key))
})
.map(
|((child_count_candidate, saved_primary_child_byte_candidate), count)| {
SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidatePattern {
child_count_candidate: *child_count_candidate,
child_count_candidate_hex: format!(
"0x{child_count_candidate:04x}"
),
saved_primary_child_byte_candidate:
*saved_primary_child_byte_candidate,
saved_primary_child_byte_candidate_hex: format!(
"0x{saved_primary_child_byte_candidate:02x}"
),
count: *count,
}
},
);
let name_pair_summaries = dominant_name_pair_counts
.iter()
.map(|((primary_name, secondary_name), count)| {
SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanNamePairSummary {
primary_name: primary_name.clone(),
secondary_name: secondary_name.clone(),
count: *count,
}
})
.take(8)
.collect::<Vec<_>>();
let compact_prefix_pattern_summaries = dominant_prefix_counts
.iter()
.map(
|((prefix_leading_dword, prefix_trailing_word, prefix_separator_byte), count)| {
SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanPrefixSummary {
prefix_leading_dword: *prefix_leading_dword,
prefix_leading_dword_hex: format!(
"0x{prefix_leading_dword:08x}"
),
prefix_trailing_word: *prefix_trailing_word,
prefix_trailing_word_hex: format!(
"0x{prefix_trailing_word:04x}"
),
prefix_separator_byte: *prefix_separator_byte,
prefix_separator_byte_hex: format!(
"0x{prefix_separator_byte:02x}"
),
count: *count,
}
},
)
.take(8)
.collect::<Vec<_>>();
let candidate_pattern_summaries = dominant_candidate_pattern_counts
.iter()
.map(
|((child_count_candidate, saved_primary_child_byte_candidate), count)| {
SmpSavePlacedStructureDynamicSideBufferNamePreludeCandidatePattern {
child_count_candidate: *child_count_candidate,
child_count_candidate_hex: format!(
"0x{child_count_candidate:04x}"
),
saved_primary_child_byte_candidate:
*saved_primary_child_byte_candidate,
saved_primary_child_byte_candidate_hex: format!(
"0x{saved_primary_child_byte_candidate:02x}"
),
count: *count,
}
},
)
.take(8)
.collect::<Vec<_>>();
SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanClassSummary {
profile_chunk_len_to_next_name_or_end: dominant_profile_span_len,
row_count: dominant_rows.len(),
unique_name_pair_count: dominant_name_pair_counts.len(),
unique_compact_prefix_pattern_count: dominant_prefix_counts.len(),
dominant_candidate_pattern,
dominant_primary_name: dominant_name_pair
.as_ref()
.and_then(|(primary_name, _, _)| primary_name.clone()),
dominant_secondary_name: dominant_name_pair
.as_ref()
.and_then(|(_, secondary_name, _)| secondary_name.clone()),
dominant_name_pair_count: dominant_name_pair
.map(|(_, _, count)| count)
.unwrap_or_default(),
dominant_prefix_leading_dword: dominant_prefix
.map(|(prefix_leading_dword, _, _, _)| prefix_leading_dword),
dominant_prefix_leading_dword_hex: dominant_prefix.map(
|(prefix_leading_dword, _, _, _)| format!("0x{prefix_leading_dword:08x}"),
),
dominant_prefix_trailing_word: dominant_prefix
.map(|(_, prefix_trailing_word, _, _)| prefix_trailing_word),
dominant_prefix_trailing_word_hex: dominant_prefix.map(
|(_, prefix_trailing_word, _, _)| format!("0x{prefix_trailing_word:04x}"),
),
dominant_prefix_separator_byte: dominant_prefix
.map(|(_, _, prefix_separator_byte, _)| prefix_separator_byte),
dominant_prefix_separator_byte_hex: dominant_prefix.map(
|(_, _, prefix_separator_byte, _)| format!("0x{prefix_separator_byte:02x}"),
),
dominant_prefix_count: dominant_prefix
.map(|(_, _, _, count)| count)
.unwrap_or_default(),
sample_rows: dominant_rows
.iter()
.take(8)
.enumerate()
.map(
|(
sample_index,
(
name_tag_relative_offset,
primary_name,
secondary_name,
prefix_leading_dword,
prefix_trailing_word,
prefix_separator_byte,
child_count_candidate,
saved_primary_child_byte_candidate,
),
)| {
SmpSavePlacedStructureDynamicSideBufferDominantProfileSpanSample {
sample_index,
name_tag_relative_offset: *name_tag_relative_offset,
primary_name: primary_name.clone(),
secondary_name: secondary_name.clone(),
prefix_leading_dword: *prefix_leading_dword,
prefix_leading_dword_hex: format!(
"0x{prefix_leading_dword:08x}"
),
prefix_trailing_word: *prefix_trailing_word,
prefix_trailing_word_hex: format!(
"0x{prefix_trailing_word:04x}"
),
prefix_separator_byte: *prefix_separator_byte,
prefix_separator_byte_hex: format!(
"0x{prefix_separator_byte:02x}"
),
child_count_candidate: *child_count_candidate,
child_count_candidate_hex: child_count_candidate
.map(|value| format!("0x{value:04x}")),
saved_primary_child_byte_candidate:
*saved_primary_child_byte_candidate,
saved_primary_child_byte_candidate_hex:
saved_primary_child_byte_candidate
.map(|value| format!("0x{value:02x}")),
}
},
)
.collect(),
name_pair_summaries,
compact_prefix_pattern_summaries,
candidate_pattern_summaries,
}
});
let payload_envelope_summary = Some(
SmpSavePlacedStructureDynamicSideBufferPayloadEnvelopeSummary {
row_count_with_policy_tag_before_next_name,
@ -13159,6 +13579,7 @@ fn parse_save_placed_structure_dynamic_side_buffer_probe(
short_profile_flag_pair_summary: short_profile_flag_pair_summary.clone(),
fixed_policy_summary: fixed_policy_summary.clone(),
name_prelude_candidate_summary: name_prelude_candidate_summary.clone(),
dominant_profile_span_class_summary: dominant_profile_span_class_summary.clone(),
sample_rows: payload_envelope_rows
.iter()
.take(8)
@ -23666,6 +24087,7 @@ mod tests {
sample_rows: Vec::new(),
},
),
dominant_profile_span_class_summary: None,
sample_rows: Vec::new(),
},
),

View file

@ -2980,6 +2980,18 @@ The low helper strip beneath that shared family is tighter now too: `0x0052ecd0`
while the zero-length class is a separate `0x0055 / 0x00` outlier across `18/18` rows and the
`0x06` class is the only large mixed frontier left. So the next infrastructure pass should focus
on classifying the mixed `0x06` rows instead of re-proving the pure-prelude `0x03` class.
That `0x06` class is now narrower too: grounded `q.gms` shows the dominant short-span family as
`BridgeSTWood_Section.3dp / Infrastructure` with compact prefix `0xff000000 / 0x0001 / 0xff`
across `62/72` rows and dominant prelude candidate `0x0001 / 0xff` across `63/72` rows. So the
next infrastructure pass should stop treating all short rows as equally ambiguous and focus on
the smaller outlier families inside that class, especially the `BallastCap`-style zero-like
rows and any remaining non-`0x0001 / 0xff` prelude candidates.
Those outliers are explicit now too: the remaining `10` short-span rows on grounded `q.gms`
break into `9` `BallastCapST_Cap.3dp / Infrastructure` rows with compact prefix
`0xf3010100 / 0x0055 / 0x00` and candidate pattern `0x0055 / 0x00`, plus `1`
`TrackCapST_Cap.3dp / Infrastructure` row with compact prefix `0xff0000ff / 0x0001 / 0xff`.
So the next infrastructure pass should target the `BallastCap` outlier family first instead of
spending time on the already-dominant bridge-section class.
The child loader family is explicit now too: local `.rdata` at `0x005cfd00` proves the
`Infrastructure` child vtable uses the shared tagged callback strip directly, with
`+0x40 = 0x00455fc0`, `+0x48 = 0x00455870`, and `+0x4c = 0x00455930`. So the remaining

View file

@ -131,6 +131,18 @@ Working rule:
with dominant pattern `0x0055 / 0x00` across `18/18` rows and the `0x06` class remains the only
large mixed frontier. So the next infrastructure slice should focus on classifying the mixed
`0x06` rows, not on rediscovering the already-grounded pure-prelude `0x03` rows.
- That `0x06` class is now narrower too: grounded `q.gms` shows the dominant short-span class as
`BridgeSTWood_Section.3dp / Infrastructure` with compact prefix `0xff000000 / 0x0001 / 0xff`
across `62/72` rows and dominant prelude candidate `0x0001 / 0xff` across `63/72` rows. So the
next infrastructure slice should stop treating the `0x06` class as uniformly ambiguous and focus
on the smaller outlier families inside that class, especially the zero-like `BallastCap`-style
rows and any remaining non-`0x0001 / 0xff` prelude candidates.
- Those outliers are explicit now too: the remaining `10` short-span rows on grounded `q.gms`
break into `9` `BallastCapST_Cap.3dp / Infrastructure` rows with compact prefix
`0xf3010100 / 0x0055 / 0x00` and candidate pattern `0x0055 / 0x00`, plus `1`
`TrackCapST_Cap.3dp / Infrastructure` row with compact prefix `0xff0000ff / 0x0001 / 0xff`.
So the next infrastructure slice should target the `BallastCap` outlier family first, not the
already-dominant bridge-section class.
- Reconstruct the save-side region record body on top of the newly corrected non-direct tagged
region seam (`0x5209/0x520a/0x520b`, stride hint `0x06`, `Marker09` record stems) now that the
`0x55f3` payload is known to be fully consumed by the embedded profile collection on grounded