From 22040d9432d6535f2a92b321cb591a473b0b8ffb Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 2 Apr 2026 20:22:15 -0700 Subject: [PATCH 1/2] performance work --- src/elements.rs | 598 +++++++++++++++++++++++++++++++++++++++++------- src/library.rs | 91 ++++++-- 2 files changed, 592 insertions(+), 97 deletions(-) diff --git a/src/elements.rs b/src/elements.rs index 4dd8969..f44059c 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -28,6 +28,68 @@ use arrow::array::{ pub type DListBuilder = ListBuilder>; pub type FListBuilder = FixedSizeListBuilder>; +struct PropertyRecord { + key: i16, + value: String, +} + +struct BoundaryRecord { + layer_id: u32, + vertices: Vec, + properties: Vec, +} + +struct BoundaryBatchRecord { + layer_id: u32, + vertices: Vec, + vertex_offsets: Vec, +} + +struct RectBatchRecord { + layer_id: u32, + rects: Vec, +} + +struct SRefRecord { + target_id: u32, + invert_y: bool, + scale: f64, + angle_rad: f64, + xy: u64, + properties: Vec, +} + +struct ARefRecord { + target_id: u32, + invert_y: bool, + scale: f64, + angle_rad: f64, + xy: u64, + xy0: u64, + xy1: u64, + counts: u32, + properties: Vec, +} + +enum RefRecord { + SRef(SRefRecord), + ARef(ARefRecord), +} + + +const UNIT_COUNTS: u32 = (1_u32 << 16) | 1_u32; + + +fn layer_id_for( + layer: i16, + dtype: i16, + layers: &mut HashMap, + ) -> u32 { + let layer32 = ((layer as u16 as u32) << 16) | (dtype as u16 as u32); + let next_id = layers.len(); + *layers.entry(layer32).or_insert(next_id.try_into().unwrap()) +} + fn insert_layer( layer: i16, @@ -36,11 +98,9 @@ fn insert_layer( struct_builder: &mut StructBuilder, field_index: usize, ) { - let layer32 = ((layer as u16 as u32) << 16) | (dtype as u16 as u32); - let next_id = layers.len(); - let id = layers.entry(layer32).or_insert(next_id.try_into().unwrap()); + let id = layer_id_for(layer, dtype, layers); let layer_builder = struct_builder.field_builder::(field_index).unwrap(); - layer_builder.append_value(*id); + layer_builder.append_value(id); } @@ -51,12 +111,25 @@ pub fn read_elements<'a>( layers: &mut HashMap, ) -> IResult<'a, ()> { let mut input = input; + let mut boundaries = Vec::::new(); + let mut srefs = Vec::::new(); + let mut arefs = Vec::::new(); let (_, mut header) = RecordHeader::read(input)?; // don't consume tag while header.tag != records::RTAG_ENDSTR { (input, _) = match header.tag { - records::RTAG_SREF => read_ref(input, cell_builder, header.tag, names)?, - records::RTAG_AREF => read_ref(input, cell_builder, header.tag, names)?, - records::RTAG_BOUNDARY => read_boundary(input, cell_builder, layers)?, + records::RTAG_SREF | records::RTAG_AREF => { + let (next_input, ref_record) = read_ref(input, header.tag, names)?; + match ref_record { + RefRecord::SRef(sref) => srefs.push(sref), + RefRecord::ARef(aref) => arefs.push(aref), + } + (next_input, ()) + }, + records::RTAG_BOUNDARY => { + let (next_input, boundary) = read_boundary(input, layers)?; + boundaries.push(boundary); + (next_input, ()) + }, records::RTAG_PATH => read_path(input, cell_builder, layers)?, records::RTAG_NODE => read_boxnode(input, cell_builder, header.tag, layers)?, records::RTAG_BOX => read_boxnode(input, cell_builder, header.tag, layers)?, @@ -71,50 +144,363 @@ pub fn read_elements<'a>( } (input, _) = take_bytes(input, 4_usize)?; // consume endstr tag - let refs_builder = cell_builder.field_builder::(2).unwrap(); - refs_builder.append(true); - let boundaries_builder = cell_builder.field_builder::(3).unwrap(); - boundaries_builder.append(true); - let paths_builder = cell_builder.field_builder::(4).unwrap(); + let mut sref_plain = Vec::::with_capacity(srefs.len() + arefs.len() / 4); + let mut sref_props = Vec::::with_capacity(srefs.len() / 16 + arefs.len() / 64); + for sref in srefs { + if sref.properties.is_empty() { + sref_plain.push(sref); + } else { + sref_props.push(sref); + } + } + + let mut aref_plain = Vec::::with_capacity(arefs.len()); + let mut aref_props = Vec::::with_capacity(arefs.len() / 16); + for aref in arefs { + if aref.counts == UNIT_COUNTS { + let sref = aref_to_sref(aref); + if sref.properties.is_empty() { + sref_plain.push(sref); + } else { + sref_props.push(sref); + } + } else if aref.properties.is_empty() { + aref_plain.push(aref); + } else { + aref_props.push(aref); + } + } + sref_plain.sort_by_key(|sref| sref.target_id); + aref_plain.sort_by_key(|aref| aref.target_id); + + let mut boundary_plain = Vec::::with_capacity(boundaries.len()); + let mut boundary_props = Vec::::with_capacity(boundaries.len() / 16); + for boundary in boundaries { + if boundary.properties.is_empty() { + boundary_plain.push(boundary); + } else { + boundary_props.push(boundary); + } + } + boundary_plain.sort_by_key(|boundary| boundary.layer_id); + + let mut rect_batches = Vec::::new(); + let mut boundary_batches = Vec::::new(); + let mut ii = 0; + while ii < boundary_plain.len() { + let start = ii; + let layer_id = boundary_plain[ii].layer_id; + while ii < boundary_plain.len() && boundary_plain[ii].layer_id == layer_id { + ii += 1; + } + + let group = &boundary_plain[start..ii]; + let rect_capacity = group.len() * 4; + let vertex_capacity = group.iter().map(|boundary| boundary.vertices.len()).sum(); + let mut rect_batch = RectBatchRecord { + layer_id, + rects: Vec::with_capacity(rect_capacity), + }; + let mut boundary_batch = BoundaryBatchRecord { + layer_id, + vertices: Vec::with_capacity(vertex_capacity), + vertex_offsets: Vec::with_capacity(group.len()), + }; + + let mut next_offset: u32 = 0; + for boundary in group { + if let Some(rect) = boundary_to_rect(&boundary.vertices) { + rect_batch.rects.extend_from_slice(&rect); + } else { + boundary_batch.vertex_offsets.push(next_offset); + boundary_batch.vertices.extend_from_slice(&boundary.vertices); + next_offset += (boundary.vertices.len() / 2) as u32; + } + } + if !rect_batch.rects.is_empty() { + rect_batches.push(rect_batch); + } + if !boundary_batch.vertex_offsets.is_empty() { + boundary_batches.push(boundary_batch); + } + } + + let sref_builder = cell_builder.field_builder::(2).unwrap(); + for sref in sref_plain { + append_sref(sref_builder, sref); + } + sref_builder.append(true); + + let aref_builder = cell_builder.field_builder::(3).unwrap(); + for aref in aref_plain { + append_aref(aref_builder, aref); + } + aref_builder.append(true); + + let sref_props_builder = cell_builder.field_builder::(4).unwrap(); + for sref in sref_props { + append_sref_prop(sref_props_builder, sref); + } + sref_props_builder.append(true); + + let aref_props_builder = cell_builder.field_builder::(5).unwrap(); + for aref in aref_props { + append_aref_prop(aref_props_builder, aref); + } + aref_props_builder.append(true); + + let rect_batches_builder = cell_builder.field_builder::(6).unwrap(); + for rect_batch in rect_batches { + append_rect_batch(rect_batches_builder, rect_batch); + } + rect_batches_builder.append(true); + + let boundary_batches_builder = cell_builder.field_builder::(7).unwrap(); + for boundary_batch in boundary_batches { + append_boundary_batch(boundary_batches_builder, boundary_batch); + } + boundary_batches_builder.append(true); + + let boundary_props_builder = cell_builder.field_builder::(8).unwrap(); + for boundary in boundary_props { + append_boundary_prop(boundary_props_builder, boundary); + } + boundary_props_builder.append(true); + + let paths_builder = cell_builder.field_builder::(9).unwrap(); paths_builder.append(true); - let nodes_builder = cell_builder.field_builder::(5).unwrap(); + let nodes_builder = cell_builder.field_builder::(10).unwrap(); nodes_builder.append(true); - let boxes_builder = cell_builder.field_builder::(6).unwrap(); + let boxes_builder = cell_builder.field_builder::(11).unwrap(); boxes_builder.append(true); - let texts_builder = cell_builder.field_builder::(7).unwrap(); + let texts_builder = cell_builder.field_builder::(12).unwrap(); texts_builder.append(true); Ok((input, ())) } -pub fn read_boundary<'a>( - input: &'a [u8], - cell_builder: &mut StructBuilder, - layers: &mut HashMap, - ) -> IResult<'a, ()> { - let boundaries_builder = cell_builder.field_builder::(3).unwrap(); - let boundary_builder = boundaries_builder.values().as_any_mut().downcast_mut::().unwrap(); +fn append_sref( + sref_builder: &mut DListBuilder, + sref: SRefRecord, + ) { + let sref_struct_builder = sref_builder.values().as_any_mut().downcast_mut::().unwrap(); + let target_builder = sref_struct_builder.field_builder::(0).unwrap(); + target_builder.append_value(sref.target_id); + let invert_builder = sref_struct_builder.field_builder::(1).unwrap(); + invert_builder.append_value(sref.invert_y); + let scale_builder = sref_struct_builder.field_builder::(2).unwrap(); + scale_builder.append_value(sref.scale); + let angle_builder = sref_struct_builder.field_builder::(3).unwrap(); + angle_builder.append_value(sref.angle_rad); + let xy_builder = sref_struct_builder.field_builder::(4).unwrap(); + xy_builder.append_value(sref.xy); + + sref_struct_builder.append(true); +} + + +fn append_aref( + aref_builder: &mut DListBuilder, + aref: ARefRecord, + ) { + let aref_struct_builder = aref_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let target_builder = aref_struct_builder.field_builder::(0).unwrap(); + target_builder.append_value(aref.target_id); + let invert_builder = aref_struct_builder.field_builder::(1).unwrap(); + invert_builder.append_value(aref.invert_y); + let scale_builder = aref_struct_builder.field_builder::(2).unwrap(); + scale_builder.append_value(aref.scale); + let angle_builder = aref_struct_builder.field_builder::(3).unwrap(); + angle_builder.append_value(aref.angle_rad); + let xy_builder = aref_struct_builder.field_builder::(4).unwrap(); + xy_builder.append_value(aref.xy); + let xy0_builder = aref_struct_builder.field_builder::(5).unwrap(); + xy0_builder.append_value(aref.xy0); + let xy1_builder = aref_struct_builder.field_builder::(6).unwrap(); + xy1_builder.append_value(aref.xy1); + let counts_builder = aref_struct_builder.field_builder::(7).unwrap(); + counts_builder.append_value(aref.counts); + + aref_struct_builder.append(true); +} + + +fn append_properties( + props_builder: &mut DListBuilder, + properties: Vec, + ) { + for prop in properties { + let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); + let key_builder = prop_builder.field_builder::(0).unwrap(); + key_builder.append_value(prop.key); + + let val_builder = prop_builder.field_builder::(1).unwrap(); + val_builder.append_value(prop.value); + + prop_builder.append(true); + } + props_builder.append(true); +} + + +fn append_sref_prop( + sref_props_builder: &mut DListBuilder, + sref: SRefRecord, + ) { + let sref_prop_builder = sref_props_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let target_builder = sref_prop_builder.field_builder::(0).unwrap(); + target_builder.append_value(sref.target_id); + let invert_builder = sref_prop_builder.field_builder::(1).unwrap(); + invert_builder.append_value(sref.invert_y); + let scale_builder = sref_prop_builder.field_builder::(2).unwrap(); + scale_builder.append_value(sref.scale); + let angle_builder = sref_prop_builder.field_builder::(3).unwrap(); + angle_builder.append_value(sref.angle_rad); + let xy_builder = sref_prop_builder.field_builder::(4).unwrap(); + xy_builder.append_value(sref.xy); + + let props_builder = sref_prop_builder.field_builder::(5).unwrap(); + append_properties(props_builder, sref.properties); + + sref_prop_builder.append(true); +} + + +fn append_aref_prop( + aref_props_builder: &mut DListBuilder, + aref: ARefRecord, + ) { + let aref_prop_builder = aref_props_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let target_builder = aref_prop_builder.field_builder::(0).unwrap(); + target_builder.append_value(aref.target_id); + let invert_builder = aref_prop_builder.field_builder::(1).unwrap(); + invert_builder.append_value(aref.invert_y); + let scale_builder = aref_prop_builder.field_builder::(2).unwrap(); + scale_builder.append_value(aref.scale); + let angle_builder = aref_prop_builder.field_builder::(3).unwrap(); + angle_builder.append_value(aref.angle_rad); + let xy_builder = aref_prop_builder.field_builder::(4).unwrap(); + xy_builder.append_value(aref.xy); + let xy0_builder = aref_prop_builder.field_builder::(5).unwrap(); + xy0_builder.append_value(aref.xy0); + let xy1_builder = aref_prop_builder.field_builder::(6).unwrap(); + xy1_builder.append_value(aref.xy1); + let counts_builder = aref_prop_builder.field_builder::(7).unwrap(); + counts_builder.append_value(aref.counts); + + let props_builder = aref_prop_builder.field_builder::(8).unwrap(); + append_properties(props_builder, aref.properties); + + aref_prop_builder.append(true); +} + + +fn aref_to_sref(aref: ARefRecord) -> SRefRecord { + SRefRecord { + target_id: aref.target_id, + invert_y: aref.invert_y, + scale: aref.scale, + angle_rad: aref.angle_rad, + xy: aref.xy, + properties: aref.properties, + } +} + + +fn read_boundary<'a>( + input: &'a [u8], + layers: &mut HashMap, + ) -> IResult<'a, BoundaryRecord> { let (input, _) = records::BOUNDARY::read(input)?; let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = DATATYPE::read(input)?; - insert_layer(layer, dtype, layers, boundary_builder, 0); + let layer_id = layer_id_for(layer, dtype, layers); - let xys_builder = boundary_builder.field_builder::(1).unwrap(); - let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); let (input, xy_iter) = XY::read(input)?; - for xy in xy_iter { - xy_builder.append_value(xy); + let mut vertices: Vec = xy_iter.collect(); + vertices.truncate(vertices.len().saturating_sub(2)); + + let (input, properties) = read_properties_vec(input)?; + + Ok((input, BoundaryRecord { layer_id, vertices, properties })) +} + + +fn append_boundary_batch( + boundary_batches_builder: &mut DListBuilder, + boundary_batch: BoundaryBatchRecord, + ) { + let boundary_batch_builder = boundary_batches_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let layer_builder = boundary_batch_builder.field_builder::(0).unwrap(); + layer_builder.append_value(boundary_batch.layer_id); + + let vertices_builder = boundary_batch_builder.field_builder::(1).unwrap(); + let vertex_builder = vertices_builder.values().as_any_mut().downcast_mut::().unwrap(); + vertex_builder.append_slice(&boundary_batch.vertices); + vertices_builder.append(true); + + let offsets_builder = boundary_batch_builder.field_builder::(2).unwrap(); + let offset_builder = offsets_builder.values().as_any_mut().downcast_mut::().unwrap(); + offset_builder.append_slice(&boundary_batch.vertex_offsets); + offsets_builder.append(true); + + boundary_batch_builder.append(true); +} + + +fn append_rect_batch( + rect_batches_builder: &mut DListBuilder, + rect_batch: RectBatchRecord, + ) { + let rect_batch_builder = rect_batches_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let layer_builder = rect_batch_builder.field_builder::(0).unwrap(); + layer_builder.append_value(rect_batch.layer_id); + + let rects_builder = rect_batch_builder.field_builder::(1).unwrap(); + let rect_builder = rects_builder.values().as_any_mut().downcast_mut::().unwrap(); + rect_builder.append_slice(&rect_batch.rects); + rects_builder.append(true); + + rect_batch_builder.append(true); +} + + +fn append_boundary_prop( + boundary_props_builder: &mut DListBuilder, + boundary: BoundaryRecord, + ) { + let boundary_prop_builder = boundary_props_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let layer_builder = boundary_prop_builder.field_builder::(0).unwrap(); + layer_builder.append_value(boundary.layer_id); + + let vertices_builder = boundary_prop_builder.field_builder::(1).unwrap(); + let vertex_builder = vertices_builder.values().as_any_mut().downcast_mut::().unwrap(); + vertex_builder.append_slice(&boundary.vertices); + vertices_builder.append(true); + + let props_builder = boundary_prop_builder.field_builder::(2).unwrap(); + for prop in boundary.properties { + let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); + let key_builder = prop_builder.field_builder::(0).unwrap(); + key_builder.append_value(prop.key); + + let val_builder = prop_builder.field_builder::(1).unwrap(); + val_builder.append_value(prop.value); + + prop_builder.append(true); } - xys_builder.append(true); + props_builder.append(true); - let props_builder = boundary_builder.field_builder::(2).unwrap(); - let (input, ()) = read_properties(input, props_builder)?; - - boundary_builder.append(true); - Ok((input, ())) + boundary_prop_builder.append(true); } @@ -123,7 +509,7 @@ pub fn read_path<'a>( cell_builder: &mut StructBuilder, layers: &mut HashMap, ) -> IResult<'a, ()> { - let paths_builder = cell_builder.field_builder::(4).unwrap(); + let paths_builder = cell_builder.field_builder::(9).unwrap(); let path_builder = paths_builder.values().as_any_mut().downcast_mut::().unwrap(); let (input, _) = records::PATH::read(input)?; @@ -196,8 +582,8 @@ pub fn read_boxnode<'a>( layers: &mut HashMap, ) -> IResult<'a, ()> { let field_num = match tag { - records::RTAG_NODE => 5, - records::RTAG_BOX => 6, + records::RTAG_NODE => 10, + records::RTAG_BOX => 11, _ => return fail(input, format!("Unexpected tag {:04x}", tag)), }; @@ -238,7 +624,7 @@ pub fn read_text<'a>( cell_builder: &mut StructBuilder, layers: &mut HashMap, ) -> IResult<'a, ()> { - let texts_builder = cell_builder.field_builder::(7).unwrap(); + let texts_builder = cell_builder.field_builder::(12).unwrap(); let text_builder = texts_builder.values().as_any_mut().downcast_mut::().unwrap(); let mut path_type = None; @@ -333,13 +719,53 @@ pub fn read_text<'a>( } +fn boundary_to_rect(vertices: &[i32]) -> Option<[i32; 4]> { + if vertices.len() != 8 { + return None; + } -pub fn read_ref<'a>( + let xs = [vertices[0], vertices[2], vertices[4], vertices[6]]; + let ys = [vertices[1], vertices[3], vertices[5], vertices[7]]; + + let min_x = *xs.iter().min().unwrap(); + let max_x = *xs.iter().max().unwrap(); + let min_y = *ys.iter().min().unwrap(); + let max_y = *ys.iter().max().unwrap(); + if min_x >= max_x || min_y >= max_y { + return None; + } + + let mut seen_corners: u8 = 0; + for ii in 0..4 { + let jj = (ii + 1) % 4; + let dx = xs[jj] - xs[ii]; + let dy = ys[jj] - ys[ii]; + if (dx == 0) == (dy == 0) { + return None; + } + + let x = xs[ii]; + let y = ys[ii]; + if (x != min_x && x != max_x) || (y != min_y && y != max_y) { + return None; + } + let code = ((x == max_x) as u8) << 1 | ((y == max_y) as u8); + seen_corners |= 1 << code; + } + + if seen_corners != 0b1111 { + return None; + } + Some([min_x, min_y, max_x, max_y]) +} + + + +fn read_ref<'a>( input: &'a [u8], - cell_builder: &mut StructBuilder, tag: u16, names: &mut HashMap, - ) -> IResult<'a, ()> { + ) -> IResult<'a, RefRecord> { let (input, _) = match tag { records::RTAG_SREF => records::SREF::read(input)?, records::RTAG_AREF => records::AREF::read(input)?, @@ -347,20 +773,15 @@ pub fn read_ref<'a>( }; let is_aref = tag == records::RTAG_AREF; - let refs_builder = cell_builder.field_builder::(2).unwrap(); - let ref_builder = refs_builder.values().as_any_mut().downcast_mut::().unwrap(); - - let mut invert_y = None; - let mut mag = None; - let mut angle_deg = None; + let mut invert_y = false; + let mut scale = 1.0; + let mut angle_rad = 0.0; let mut colrow = None; let (input, struct_name_bytes) = SNAME::skip_and_read(input)?; let struct_name = String::from_utf8(struct_name_bytes).unwrap(); let next_id = names.len(); let id = names.entry(struct_name).or_insert(next_id.try_into().unwrap()); - let target_builder = ref_builder.field_builder::(0).unwrap(); - target_builder.append_value(*id); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_XY { @@ -368,17 +789,17 @@ pub fn read_ref<'a>( records::RTAG_STRANS => { let strans; (input, strans) = STRANS::read_data(input, header.data_size)?; - invert_y = Some(strans[0]); + invert_y = strans[0]; }, records::RTAG_MAG => { let _mag; (input, _mag) = MAG::read_data(input, header.data_size)?; - mag = Some(_mag); + scale = _mag; }, records::RTAG_ANGLE => { let _angle_deg; (input, _angle_deg) = ANGLE::read_data(input, header.data_size)?; - angle_deg = Some(_angle_deg); + angle_rad = _angle_deg.to_radians(); }, records::RTAG_COLROW => { let mut _colrow; @@ -393,23 +814,13 @@ pub fn read_ref<'a>( }; (input, header) = RecordHeader::read(input)?; } - let inv_builder = ref_builder.field_builder::(1).unwrap(); - inv_builder.append_option(invert_y); - let mag_builder = ref_builder.field_builder::(2).unwrap(); - mag_builder.append_option(mag); - let angle_builder = ref_builder.field_builder::(3).unwrap(); - angle_builder.append_option(angle_deg); - let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; let xx = xy_iter.next().unwrap(); let yy = xy_iter.next().unwrap(); let xy = ((xx as u32 as u64) << 32) | (yy as u32 as u64); - let xy_builder = ref_builder.field_builder::(4).unwrap(); - xy_builder.append_value(xy); + let (input, properties) = read_properties_vec(input)?; - let rep_builder = ref_builder.field_builder::(5).unwrap(); -// println!("ref, {is_aref:?}"); if is_aref { if colrow.is_none() { return fail(input, "AREF without COLROW before XY".to_string()) @@ -423,30 +834,28 @@ pub fn read_ref<'a>( let xy0a = ((x0a as u32 as u64) << 32) | (y0a as u32 as u64); let xy1a = ((x1a as u32 as u64) << 32) | (y1a as u32 as u64); - let xy0_builder = rep_builder.field_builder::(0).unwrap(); - xy0_builder.append_value(xy0a); - let xy1_builder = rep_builder.field_builder::(1).unwrap(); - xy1_builder.append_value(xy1a); - let counts = ((count0 as u16 as u32) << 16) | (count1 as u16 as u32); - let counts_builder = rep_builder.field_builder::(2).unwrap(); - counts_builder.append_value(counts); - + Ok((input, RefRecord::ARef(ARefRecord { + target_id: *id, + invert_y, + scale, + angle_rad, + xy, + xy0: xy0a, + xy1: xy1a, + counts, + properties, + }))) } else { - let xy0_builder = rep_builder.field_builder::(0).unwrap(); - xy0_builder.append_null(); - let xy1_builder = rep_builder.field_builder::(1).unwrap(); - xy1_builder.append_null(); - let counts_builder = rep_builder.field_builder::(2).unwrap(); - counts_builder.append_null(); + Ok((input, RefRecord::SRef(SRefRecord { + target_id: *id, + invert_y, + scale, + angle_rad, + xy, + properties, + }))) } - rep_builder.append(is_aref); - - let props_builder = ref_builder.field_builder::(6).unwrap(); - let (input, ()) = read_properties(input, props_builder)?; - - ref_builder.append(true); - Ok((input, ())) } @@ -490,6 +899,29 @@ pub fn read_properties<'a>(input: &'a [u8], props_builder: &mut DListBuilder) -> Ok((input, ())) } + +fn read_properties_vec<'a>(input: &'a [u8]) -> IResult<'a, Vec> { + let mut properties = Vec::::new(); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != ENDEL::tag() { + if header.tag == PROPATTR::tag() { + let key; + let value_bytes; + (input, key) = PROPATTR::read_data(input, header.data_size)?; + (input, value_bytes) = PROPVALUE::read(input)?; + + properties.push(PropertyRecord { + key, + value: String::from_utf8(value_bytes).unwrap(), + }); + } + (input, header) = RecordHeader::read(input)?; + } + + Ok((input, properties)) +} + /* /// diff --git a/src/library.rs b/src/library.rs index abe4ca4..15ae3c8 100644 --- a/src/library.rs +++ b/src/library.rs @@ -111,19 +111,43 @@ pub fn read_library(input: &[u8]) -> IResult { )); - let repetition_struct_t = DataType::Struct(Fields::from(vec![ + let sref_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, false), + Field::new("scale", DataType::Float64, false), + Field::new("angle_rad", DataType::Float64, false), + Field::new("xy", DataType::UInt64, false), + ])); + + let aref_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, false), + Field::new("scale", DataType::Float64, false), + Field::new("angle_rad", DataType::Float64, false), + Field::new("xy", DataType::UInt64, false), Field::new("xy0", DataType::UInt64, false), Field::new("xy1", DataType::UInt64, false), Field::new("counts", DataType::UInt32, false), ])); - let ref_struct_t = DataType::Struct(Fields::from(vec![ + let sref_prop_struct_t = DataType::Struct(Fields::from(vec![ Field::new("target", DataType::UInt32, false), - Field::new("invert_y", DataType::Boolean, true), - Field::new("mag", DataType::Float64, true), - Field::new("angle_deg", DataType::Float64, true), + Field::new("invert_y", DataType::Boolean, false), + Field::new("scale", DataType::Float64, false), + Field::new("angle_rad", DataType::Float64, false), Field::new("xy", DataType::UInt64, false), - Field::new("repetition", repetition_struct_t, true), + Field::new("properties", property_list_t.clone(), true), + ])); + + let aref_prop_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, false), + Field::new("scale", DataType::Float64, false), + Field::new("angle_rad", DataType::Float64, false), + Field::new("xy", DataType::UInt64, false), + Field::new("xy0", DataType::UInt64, false), + Field::new("xy1", DataType::UInt64, false), + Field::new("counts", DataType::UInt32, false), Field::new("properties", property_list_t.clone(), true), ])); @@ -147,9 +171,24 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new_list_field(DataType::Int32, false) )); - let boundary_struct_t = DataType::Struct(Fields::from(vec![ + let boundary_batch_struct_t = DataType::Struct(Fields::from(vec![ Field::new("layer", DataType::UInt32, false), - Field::new("xy", coords_t.clone(), false), + Field::new("vertices", coords_t.clone(), false), + Field::new( + "vertex_offsets", + DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, false))), + false, + ), + ])); + + let rect_batch_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt32, false), + Field::new("rects", coords_t.clone(), false), + ])); + + let boundary_prop_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt32, false), + Field::new("vertices", coords_t.clone(), false), Field::new("properties", property_list_t.clone(), true), ])); @@ -169,17 +208,36 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("properties", property_list_t.clone(), true), ])); + let sref_list_t = DataType::List(Arc::new( + Field::new_list_field(sref_struct_t, false) + )); - let ref_list_t = DataType::List(Arc::new( - Field::new_list_field(ref_struct_t, false) + let aref_list_t = DataType::List(Arc::new( + Field::new_list_field(aref_struct_t, false) + )); + + let sref_prop_list_t = DataType::List(Arc::new( + Field::new_list_field(sref_prop_struct_t, false) + )); + + let aref_prop_list_t = DataType::List(Arc::new( + Field::new_list_field(aref_prop_struct_t, false) )); let text_list_t = DataType::List(Arc::new( Field::new_list_field(text_struct_t, false) )); - let boundary_list_t = DataType::List(Arc::new( - Field::new_list_field(boundary_struct_t, false) + let boundary_batch_list_t = DataType::List(Arc::new( + Field::new_list_field(boundary_batch_struct_t, false) + )); + + let rect_batch_list_t = DataType::List(Arc::new( + Field::new_list_field(rect_batch_struct_t, false) + )); + + let boundary_prop_list_t = DataType::List(Arc::new( + Field::new_list_field(boundary_prop_struct_t, false) )); let path_list_t = DataType::List(Arc::new( @@ -207,8 +265,13 @@ pub fn read_library(input: &[u8]) -> IResult { let cell_struct_t = DataType::Struct(Fields::from(vec![ Field::new("id", DataType::UInt32, false), Field::new("file_offset", DataType::UInt64, false), - Field::new("refs", ref_list_t, false), - Field::new("boundaries", boundary_list_t, false), + Field::new("srefs", sref_list_t, false), + Field::new("arefs", aref_list_t, false), + Field::new("sref_props", sref_prop_list_t, false), + Field::new("aref_props", aref_prop_list_t, false), + Field::new("rect_batches", rect_batch_list_t, false), + Field::new("boundary_batches", boundary_batch_list_t, false), + Field::new("boundary_props", boundary_prop_list_t, false), Field::new("paths", path_list_t, false), Field::new("nodes", boxnode_list_t.clone(), true), Field::new("boxes", boxnode_list_t.clone(), true), From d3bf15f47ae5267fe56ace576cd2c4d7eaef0b0b Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 2 Apr 2026 20:35:31 -0700 Subject: [PATCH 2/2] follow masque interface --- Cargo.toml | 2 +- src/basic.rs | 20 ++++++++++---------- src/elements.rs | 14 +++++++------- src/lib.rs | 32 ++++++++++++++++---------------- src/library.rs | 4 ++-- src/record.rs | 36 ++++++++++++++++++------------------ 6 files changed, 54 insertions(+), 54 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 87243ac..e201109 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "klamath_rs_ext" version = "0.2.0" authors = ["jan "] -edition = "2021" +edition = "2024" [lib] diff --git a/src/basic.rs b/src/basic.rs index 8bad4e7..051d0ba 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -15,15 +15,15 @@ pub enum ErrType { Failed(String), } -pub fn fail(input: &[u8], msg: String) -> IResult { +pub fn fail(input: &[u8], msg: String) -> IResult<'_, O> { Err((input, ErrType::Failed(msg))) } -pub fn incomplete(input: &[u8], size: Option) -> IResult { +pub fn incomplete(input: &[u8], size: Option) -> IResult<'_, O> { Err((input, ErrType::Incomplete(size))) } -pub fn take_bytes>(input: &[u8], count: CC) -> IResult<&[u8]> { +pub fn take_bytes>(input: &[u8], count: CC) -> IResult<'_, &[u8]> { let cc = count.into(); if input.len() < cc { incomplete(input, Some(cc)) @@ -37,19 +37,19 @@ pub fn take_bytes>(input: &[u8], count: CC) -> IResult<&[u8]> { /* * Parse functions */ -pub fn parse_u16(input: &[u8]) -> IResult { +pub fn parse_u16(input: &[u8]) -> IResult<'_, u16> { let (input, buf) = take_bytes(input, 2_usize)?; let val = BigEndian::read_u16(buf); Ok((input, val)) } -pub fn parse_int2(input: &[u8]) -> IResult { +pub fn parse_int2(input: &[u8]) -> IResult<'_, i16> { let (input, buf) = take_bytes(input, 2_usize)?; let val = BigEndian::read_i16(buf); Ok((input, val)) } -pub fn parse_int4(input: &[u8]) -> IResult { +pub fn parse_int4(input: &[u8]) -> IResult<'_, i32> { let (input, buf) = take_bytes(input, 4_usize)?; let val = BigEndian::read_i32(buf); Ok((input, val)) @@ -67,14 +67,14 @@ pub fn decode_real8(int: u64) -> f64 { mant * 2_f64.powi(exp2) } -pub fn parse_real8(input: &[u8]) -> IResult { +pub fn parse_real8(input: &[u8]) -> IResult<'_, f64> { let (input, buf) = take_bytes(input, 8_usize)?; let data = BigEndian::read_u64(buf); Ok((input, decode_real8(data))) } -pub fn parse_datetime(input: &[u8]) -> IResult<[i16; 6]> { +pub fn parse_datetime(input: &[u8]) -> IResult<'_, [i16; 6]> { let mut buf = [0_i16; 6]; let mut input = input; for bb in &mut buf { @@ -85,7 +85,7 @@ pub fn parse_datetime(input: &[u8]) -> IResult<[i16; 6]> { } -pub fn parse_bitarray(input: &[u8]) -> IResult<[bool; 16]> { +pub fn parse_bitarray(input: &[u8]) -> IResult<'_, [bool; 16]> { let mut bits = [false; 16]; let (input, val) = parse_int2(input)?; for (ii, bit) in bits.iter_mut().enumerate() { @@ -95,7 +95,7 @@ pub fn parse_bitarray(input: &[u8]) -> IResult<[bool; 16]> { } -pub fn parse_ascii(input: &[u8], length: u16) -> IResult> { +pub fn parse_ascii(input: &[u8], length: u16) -> IResult<'_, Vec> { let length = length as usize; let (input, data) = take_bytes(input, length)?; let last = data[length - 1]; diff --git a/src/elements.rs b/src/elements.rs index f44059c..e42e3f8 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -946,7 +946,7 @@ pub trait Element { /// Read from a stream to construct this object. /// Consumes up to (and including) the ENDEL record. /// - fn read(input: &[u8]) -> IResult where Self: Sized; + fn read(input: &[u8]) -> IResult<'_, Self> where Self: Sized; /// /// Write this element to a stream. @@ -993,7 +993,7 @@ pub struct Reference { } impl Element for Reference { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let mut invert_y = false; let mut mag = 1.0; let mut angle_deg = 0.0; @@ -1094,7 +1094,7 @@ pub struct Boundary { } impl Element for Boundary { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = DATATYPE::read(input)?; let (input, xy) = XY::read(input)?; @@ -1142,7 +1142,7 @@ pub struct Path { } impl Element for Path { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let mut path_type = 0; let mut width = 0; let mut bgn_ext = 0; @@ -1221,7 +1221,7 @@ pub struct GDSBox { } impl Element for GDSBox { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = BOXTYPE::read(input)?; let (input, xy) = XY::read(input)?; @@ -1260,7 +1260,7 @@ pub struct Node { } impl Element for Node { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = NODETYPE::read(input)?; let (input, xy) = XY::read(input)?; @@ -1317,7 +1317,7 @@ pub struct Text { } impl Element for Text { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let mut path_type = 0; let mut presentation = [false; 16]; let mut invert_y = false; diff --git a/src/lib.rs b/src/lib.rs index 78e359b..39081ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ use arrow::ffi::{to_ffi, FFI_ArrowArray, FFI_ArrowSchema}; use arrow::array::Array; -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn read_path( cpath: *const c_char, arr: *mut FFI_ArrowArray, @@ -40,7 +40,7 @@ pub unsafe extern "C" fn read_path( let input = fs::read(path).expect("File read failed"); let (_input, struct_arr) = read_library(&input).expect("Read failed"); - (*arr, *schema) = to_ffi(&struct_arr.to_data()).unwrap(); + unsafe { (*arr, *schema) = to_ffi(&struct_arr.to_data()).unwrap(); } } @@ -80,40 +80,40 @@ macro_rules! impl_i32be { } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn f64_to_i16(arr: *mut f64, size: usize) -> f64 { impl_i16be!(f64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn f64_to_i32(arr: *mut f64, size: usize) -> f64 { impl_i32be!(f64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn f32_to_i16(arr: *mut f32, size: usize) -> f32 { impl_i16be!(f32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn f32_to_i32(arr: *mut f32, size: usize) -> f32 { impl_i32be!(f32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u64_to_i16(arr: *mut u64, size: usize) -> u64 { impl_i16be!(u64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u64_to_i32(arr: *mut u64, size: usize) -> u64 { impl_i32be!(u64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i64_to_i16(arr: *mut i64, size: usize) -> i64 { impl_i16be!(i64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i64_to_i32(arr: *mut i64, size: usize) -> i64 { impl_i32be!(i64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u32_to_i16(arr: *mut u32, size: usize) -> u32 { impl_i16be!(u32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u32_to_i32(arr: *mut u32, size: usize) -> u32 { impl_i32be!(u32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i32_to_i16(arr: *mut i32, size: usize) -> i32 { impl_i16be!(i32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i32_to_i32(arr: *mut i32, size: usize) -> i32 { impl_i32be!(i32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u16_to_i16(arr: *mut u16, size: usize) -> u16 { impl_i16be!(u16, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i16_to_i16(arr: *mut i16, size: usize) -> i16 { impl_i16be!(i16, arr, size) } diff --git a/src/library.rs b/src/library.rs index 15ae3c8..bf4c890 100644 --- a/src/library.rs +++ b/src/library.rs @@ -64,7 +64,7 @@ impl FileHeader { /// Returns: /// FileHeader object /// - pub fn read(input: &[u8]) -> IResult { + pub fn read(input: &[u8]) -> IResult<'_, Self> { let (input, _version) = records::HEADER::read(input)?; let (input, [mod_time, acc_time]) = records::BGNLIB::read(input)?; let (input, name) = records::LIBNAME::skip_and_read(input)?; @@ -98,7 +98,7 @@ impl FileHeader { } -pub fn read_library(input: &[u8]) -> IResult { +pub fn read_library(input: &[u8]) -> IResult<'_, StructArray> { let input_size = input.len(); let property_t = DataType::Struct(Fields::from(vec![ diff --git a/src/record.rs b/src/record.rs index 3226dbb..8bc482c 100644 --- a/src/record.rs +++ b/src/record.rs @@ -24,7 +24,7 @@ pub struct RecordHeader { } impl RecordHeader { - pub fn read(input: &[u8]) -> IResult { + pub fn read(input: &[u8]) -> IResult<'_, RecordHeader> { let (input, size) = parse_u16(input)?; let (input, tag) = parse_u16(input)?; Ok((input, RecordHeader{tag, data_size:size - 4})) @@ -50,7 +50,7 @@ pub trait RecordData { type InData : ?Sized; type ByteData : AsRef<[u8]>; - fn read(input: &[u8], size: u16) -> IResult>; + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>>; fn pack_into(buf: &mut [u8], data: &Self::InData); //fn size(data: &Self::BareData<'_>) -> u16; fn pack(data: &Self::InData) -> Self::ByteData; @@ -72,7 +72,7 @@ pub trait Record { } } - fn read_header(input: &[u8]) -> IResult { + fn read_header(input: &[u8]) -> IResult<'_, RecordHeader> { RecordHeader::read(input) } @@ -80,7 +80,7 @@ pub trait Record { RecordHeader{tag: Self::tag(), data_size}.write(ww) } - fn read_data(input: &[u8], size: u16) -> IResult> { + fn read_data(input: &[u8], size: u16) -> IResult<'_, RData::BareData<'_>> { RData::read(input, size) } @@ -95,7 +95,7 @@ pub trait Record { /// True if the record was encountered and skipped. /// False if the end of the library was reached. /// - fn skip_past(input: &[u8]) -> IResult { + fn skip_past(input: &[u8]) -> IResult<'_, bool> { let original_input = input; let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != Self::tag() { @@ -109,7 +109,7 @@ pub trait Record { Ok((input, true)) } - fn skip_and_read(input: &[u8]) -> IResult> { + fn skip_and_read(input: &[u8]) -> IResult<'_, RData::BareData<'_>> { let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != Self::tag() { (input, _) = take_bytes(input, header.data_size)?; @@ -119,7 +119,7 @@ pub trait Record { Ok((input, data)) } - fn expect_header(input: &[u8]) -> IResult { + fn expect_header(input: &[u8]) -> IResult<'_, u16> { let (input, header) = RecordHeader::read(input)?; if header.tag != Self::tag() { fail(input, format!("Unexpected record! Got tag 0x{:04x}, expected 0x{:04x}", header.tag, Self::tag())) @@ -128,7 +128,7 @@ pub trait Record { } } - fn read(input: &[u8]) -> IResult> { + fn read(input: &[u8]) -> IResult<'_, RData::BareData<'_>> { let (input, size) = Self::expect_header(input)?; Self::check_size(size).unwrap(); let (input, data) = Self::read_data(input, size)?; @@ -152,7 +152,7 @@ impl RecordData for BitArray { type InData = [bool; 16]; type ByteData = [u8; 2]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 2); parse_bitarray(input) } @@ -175,7 +175,7 @@ impl RecordData for Int2 { type InData = i16; type ByteData = [u8; 2]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 2); parse_int2(input) } @@ -197,7 +197,7 @@ impl RecordData for Int4 { type InData = i32; type ByteData = [u8; 4]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 4); parse_int4(input) } @@ -220,7 +220,7 @@ impl RecordData for Int2Array { type InData = [i16]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size % 2 == 0, "Record must contain an integer quantity of integers"); //let mut input = input; let (input, bytes) = take_bytes(input, size)?; @@ -261,7 +261,7 @@ impl RecordData for Int4Array { type InData = [i32]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size % 4 == 0, "Record must contain an integer quantity of integers"); //let mut input = input; let (input, bytes) = take_bytes(input, size)?; @@ -302,7 +302,7 @@ impl RecordData for Real8 { type InData = f64; type ByteData = [u8; 8]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 8); parse_real8(input) } @@ -324,7 +324,7 @@ impl RecordData for Real8Pair { type InData = (f64, f64); type ByteData = [u8; 2 * 8]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 2 * 8); let (input, data0) = parse_real8(input)?; let (input, data1) = parse_real8(input)?; @@ -354,7 +354,7 @@ impl RecordData for ASCII { type InData = [u8]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { parse_ascii(input, size) } @@ -376,7 +376,7 @@ impl RecordData for DateTimePair { type InData = [[i16; 6]; 2]; type ByteData = [u8; 2 * 6 * 2]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 2 * 6 * 2); let (input, data0) = parse_datetime(input)?; let (input, data1) = parse_datetime(input)?; @@ -406,7 +406,7 @@ impl RecordData for Empty { type InData = (); type ByteData = [u8; 0]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 0); Ok((input, ())) }