use global dictionary approach for layers

maybe there's a way to do this with arrow dictionaries?
This commit is contained in:
Jan Petykiewicz 2025-04-20 23:21:54 -07:00
parent 9c98ee2b97
commit 259df49a22
2 changed files with 106 additions and 66 deletions

View File

@ -29,18 +29,38 @@ pub type DListBuilder = ListBuilder<Box<dyn ArrayBuilder>>;
pub type FListBuilder = FixedSizeListBuilder<Box<dyn ArrayBuilder>>;
pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, names: &mut HashMap<String, u32>) -> IResult<'a, ()> {
fn insert_layer(
layer: i16,
dtype: i16,
layers: &mut HashMap<u32, u32>,
struct_builder: &mut StructBuilder,
field_index: usize,
) {
let layer32 = ((layer as u16 as u32) << 16) | (dtype as u16 as u32);
let next_id = layers.len();
let id = layers.entry(layer32).or_insert(next_id.try_into().unwrap());
let layer_builder = struct_builder.field_builder::<UInt32Builder>(field_index).unwrap();
layer_builder.append_value(*id);
}
pub fn read_elements<'a>(
input: &'a [u8],
cell_builder: &mut StructBuilder,
names: &mut HashMap<String, u32>,
layers: &mut HashMap<u32, u32>,
) -> IResult<'a, ()> {
let mut input = input;
let (_, mut header) = RecordHeader::read(input)?; // don't consume tag
while header.tag != records::RTAG_ENDSTR {
(input, _) = match header.tag {
records::RTAG_SREF => read_ref(input, cell_builder, header.tag, names)?,
records::RTAG_AREF => read_ref(input, cell_builder, header.tag, names)?,
records::RTAG_BOUNDARY => read_boundary(input, cell_builder)?,
records::RTAG_PATH => read_path(input, cell_builder)?,
records::RTAG_NODE => read_boxnode(input, cell_builder, header.tag)?,
records::RTAG_BOX => read_boxnode(input, cell_builder, header.tag)?,
records::RTAG_TEXT => read_text(input, cell_builder)?,
records::RTAG_BOUNDARY => read_boundary(input, cell_builder, layers)?,
records::RTAG_PATH => read_path(input, cell_builder, layers)?,
records::RTAG_NODE => read_boxnode(input, cell_builder, header.tag, layers)?,
records::RTAG_BOX => read_boxnode(input, cell_builder, header.tag, layers)?,
records::RTAG_TEXT => read_text(input, cell_builder, layers)?,
_ => {
// don't care, skip
let result = take_bytes(input, header.data_size + 4)?;
@ -68,21 +88,21 @@ pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, name
}
pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> {
pub fn read_boundary<'a>(
input: &'a [u8],
cell_builder: &mut StructBuilder,
layers: &mut HashMap<u32, u32>,
) -> IResult<'a, ()> {
let boundaries_builder = cell_builder.field_builder::<DListBuilder>(3).unwrap();
let boundary_builder = boundaries_builder.values().as_any_mut().downcast_mut::<StructBuilder>().unwrap();
let (input, _) = records::BOUNDARY::read(input)?;
let (input, layer) = LAYER::skip_and_read(input)?;
let layer_builder = boundary_builder.field_builder::<Int16Builder>(0).unwrap();
layer_builder.append_value(layer);
let (input, dtype) = DATATYPE::read(input)?;
let dtype_builder = boundary_builder.field_builder::<Int16Builder>(1).unwrap();
dtype_builder.append_value(dtype);
insert_layer(layer, dtype, layers, boundary_builder, 0);
let xys_builder = boundary_builder.field_builder::<DListBuilder>(2).unwrap();
let xys_builder = boundary_builder.field_builder::<DListBuilder>(1).unwrap();
let xy_builder = xys_builder.values().as_any_mut().downcast_mut::<Int32Builder>().unwrap();
let (input, xy_iter) = XY::read(input)?;
for xy in xy_iter {
@ -90,7 +110,7 @@ pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> I
}
xys_builder.append(true);
let props_builder = boundary_builder.field_builder::<DListBuilder>(3).unwrap();
let props_builder = boundary_builder.field_builder::<DListBuilder>(2).unwrap();
let (input, ()) = read_properties(input, props_builder)?;
boundary_builder.append(true);
@ -98,7 +118,11 @@ pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> I
}
pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> {
pub fn read_path<'a>(
input: &'a [u8],
cell_builder: &mut StructBuilder,
layers: &mut HashMap<u32, u32>,
) -> IResult<'a, ()> {
let paths_builder = cell_builder.field_builder::<DListBuilder>(4).unwrap();
let path_builder = paths_builder.values().as_any_mut().downcast_mut::<StructBuilder>().unwrap();
@ -106,10 +130,12 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu
let (input, layer) = LAYER::skip_and_read(input)?;
let (input, dtype) = DATATYPE::read(input)?;
let layer_builder = path_builder.field_builder::<Int16Builder>(0).unwrap();
layer_builder.append_value(layer);
let dtype_builder = path_builder.field_builder::<Int16Builder>(1).unwrap();
dtype_builder.append_value(dtype);
insert_layer(layer, dtype, layers, path_builder, 0);
//let layer32 = (layer as u16 as u32) << 16) | (dtype as u16 as u32)
//let next_id = layers.len();
//let id = layers.entry(layer32).or_insert(next_id.try_into().unwrap());
//let layer_builder = path_builder.field_builder::<UInt32Builder>(0).unwrap();
//layer_builder.append_value((id);
let mut path_type = None;
let mut width = None;
@ -144,16 +170,16 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu
};
(input, header) = RecordHeader::read(input)?;
}
let path_type_builder = path_builder.field_builder::<Int16Builder>(2).unwrap();
let path_type_builder = path_builder.field_builder::<Int16Builder>(1).unwrap();
path_type_builder.append_option(path_type);
let ext0_builder = path_builder.field_builder::<Int32Builder>(3).unwrap();
let ext0_builder = path_builder.field_builder::<Int32Builder>(2).unwrap();
ext0_builder.append_option(bgn_ext);
let ext1_builder = path_builder.field_builder::<Int32Builder>(4).unwrap();
let ext1_builder = path_builder.field_builder::<Int32Builder>(3).unwrap();
ext1_builder.append_option(end_ext);
let width_builder = path_builder.field_builder::<Int32Builder>(5).unwrap();
let width_builder = path_builder.field_builder::<Int32Builder>(4).unwrap();
width_builder.append_option(width);
let xys_builder = path_builder.field_builder::<DListBuilder>(6).unwrap();
let xys_builder = path_builder.field_builder::<DListBuilder>(5).unwrap();
let (input, xy_iter) = XY::read_data(input, header.data_size)?;
for xy in xy_iter {
let xy_builder = xys_builder.values().as_any_mut().downcast_mut::<Int32Builder>().unwrap();
@ -161,14 +187,19 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu
}
xys_builder.append(true);
let props_builder = path_builder.field_builder::<DListBuilder>(7).unwrap();
let props_builder = path_builder.field_builder::<DListBuilder>(6).unwrap();
let (input, ()) = read_properties(input, props_builder)?;
path_builder.append(true);
Ok((input, ()))
}
pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16) -> IResult<'a, ()> {
pub fn read_boxnode<'a>(
input: &'a [u8],
cell_builder: &mut StructBuilder,
tag: u16,
layers: &mut HashMap<u32, u32>,
) -> IResult<'a, ()> {
let field_num = match tag {
records::RTAG_NODE => 5,
records::RTAG_BOX => 6,
@ -184,19 +215,15 @@ pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag:
_ => return fail(input, format!("Unexpected tag {:04x}", tag)),
};
let layer_builder = boxnode_builder.field_builder::<Int16Builder>(0).unwrap();
let (input, layer) = LAYER::skip_and_read(input)?;
layer_builder.append_value(layer);
let (input, dtype) = match tag {
records::RTAG_NODE => NODETYPE::read(input)?,
records::RTAG_BOX => BOXTYPE::read(input)?,
_ => return fail(input, format!("Unexpected tag {:04x}", tag)),
};
let dtype_builder = boxnode_builder.field_builder::<Int16Builder>(1).unwrap();
dtype_builder.append_value(dtype);
insert_layer(layer, dtype, layers, boxnode_builder, 0);
let xys_builder = boxnode_builder.field_builder::<DListBuilder>(2).unwrap();
let xys_builder = boxnode_builder.field_builder::<DListBuilder>(1).unwrap();
let xy_builder = xys_builder.values().as_any_mut().downcast_mut::<Int32Builder>().unwrap();
let (input, xy_iter) = XY::read(input)?;
for xy in xy_iter {
@ -204,14 +231,18 @@ pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag:
}
xys_builder.append(true);
let props_builder = boxnode_builder.field_builder::<DListBuilder>(3).unwrap();
let props_builder = boxnode_builder.field_builder::<DListBuilder>(2).unwrap();
let (input, ()) = read_properties(input, props_builder)?;
boxnode_builder.append(true);
Ok((input, ()))
}
pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> {
pub fn read_text<'a>(
input: &'a [u8],
cell_builder: &mut StructBuilder,
layers: &mut HashMap<u32, u32>,
) -> IResult<'a, ()> {
let texts_builder = cell_builder.field_builder::<DListBuilder>(7).unwrap();
let text_builder = texts_builder.values().as_any_mut().downcast_mut::<StructBuilder>().unwrap();
@ -225,12 +256,8 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu
let mut angle_deg = None;
let (input, layer) = LAYER::skip_and_read(input)?;
let layer_builder = text_builder.field_builder::<Int16Builder>(0).unwrap();
layer_builder.append_value(layer);
let (input, dtype) = TEXTTYPE::read(input)?;
let dtype_builder = text_builder.field_builder::<Int16Builder>(1).unwrap();
dtype_builder.append_value(dtype);
insert_layer(layer, dtype, layers, text_builder, 0);
let (mut input, mut header) = RecordHeader::read(input)?;
while header.tag != records::RTAG_XY {
@ -274,35 +301,35 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu
(input, header) = RecordHeader::read(input)?;
}
let pres_hori_builder = text_builder.field_builder::<UInt8Builder>(2).unwrap();
let pres_hori_builder = text_builder.field_builder::<UInt8Builder>(1).unwrap();
pres_hori_builder.append_option(pres_hori);
let pres_vert_builder = text_builder.field_builder::<UInt8Builder>(3).unwrap();
let pres_vert_builder = text_builder.field_builder::<UInt8Builder>(2).unwrap();
pres_vert_builder.append_option(pres_vert);
let pres_font_builder = text_builder.field_builder::<UInt8Builder>(4).unwrap();
let pres_font_builder = text_builder.field_builder::<UInt8Builder>(3).unwrap();
pres_font_builder.append_option(pres_font);
let path_type_builder = text_builder.field_builder::<Int16Builder>(5).unwrap();
let path_type_builder = text_builder.field_builder::<Int16Builder>(4).unwrap();
path_type_builder.append_option(path_type);
let width_builder = text_builder.field_builder::<Int32Builder>(6).unwrap();
let width_builder = text_builder.field_builder::<Int32Builder>(5).unwrap();
width_builder.append_option(width);
let inv_builder = text_builder.field_builder::<BooleanBuilder>(7).unwrap();
let inv_builder = text_builder.field_builder::<BooleanBuilder>(6).unwrap();
inv_builder.append_option(invert_y);
let mag_builder = text_builder.field_builder::<Float64Builder>(8).unwrap();
let mag_builder = text_builder.field_builder::<Float64Builder>(7).unwrap();
mag_builder.append_option(mag);
let angle_builder = text_builder.field_builder::<Float64Builder>(9).unwrap();
let angle_builder = text_builder.field_builder::<Float64Builder>(8).unwrap();
angle_builder.append_option(angle_deg);
let (input, mut xy_iter) = XY::read_data(input, header.data_size)?;
let x_builder = text_builder.field_builder::<Int32Builder>(10).unwrap();
let x_builder = text_builder.field_builder::<Int32Builder>(9).unwrap();
x_builder.append_value(xy_iter.next().unwrap());
let y_builder = text_builder.field_builder::<Int32Builder>(11).unwrap();
let y_builder = text_builder.field_builder::<Int32Builder>(10).unwrap();
y_builder.append_value(xy_iter.next().unwrap());
let (input, string_bytes) = STRING::read(input)?;
let string = String::from_utf8(string_bytes).unwrap();
let string_builder = text_builder.field_builder::<StringBuilder>(12).unwrap();
let string_builder = text_builder.field_builder::<StringBuilder>(11).unwrap();
string_builder.append_value(string);
let props_builder = text_builder.field_builder::<DListBuilder>(13).unwrap();
let props_builder = text_builder.field_builder::<DListBuilder>(12).unwrap();
let (input, ()) = read_properties(input, props_builder)?;
text_builder.append(true);
@ -311,7 +338,12 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu
pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, names: &mut HashMap<String, u32>) -> IResult<'a, ()> {
pub fn read_ref<'a>(
input: &'a [u8],
cell_builder: &mut StructBuilder,
tag: u16,
names: &mut HashMap<String, u32>,
) -> IResult<'a, ()> {
let (input, _) = match tag {
records::RTAG_SREF => records::SREF::read(input)?,
records::RTAG_AREF => records::AREF::read(input)?,

View File

@ -120,7 +120,6 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
Field::new("count1", DataType::Int16, false),
]));
let ref_struct_t = DataType::Struct(Fields::from(vec![
Field::new("target", DataType::UInt32, false),
Field::new("invert_y", DataType::Boolean, true),
@ -132,10 +131,8 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
Field::new("properties", property_list_t.clone(), true),
]));
let text_struct_t = DataType::Struct(Fields::from(vec![
Field::new("layer", DataType::Int16, false),
Field::new("dtype", DataType::Int16, false),
Field::new("layer", DataType::UInt32, false),
Field::new("presentation_horiz", DataType::UInt8, true),
Field::new("presentation_vert", DataType::UInt8, true),
Field::new("presentation_font", DataType::UInt8, true),
@ -156,15 +153,13 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
));
let boundary_struct_t = DataType::Struct(Fields::from(vec![
Field::new("layer", DataType::Int16, false),
Field::new("dtype", DataType::Int16, false),
Field::new("layer", DataType::UInt32, false),
Field::new("xy", coords_t.clone(), false),
Field::new("properties", property_list_t.clone(), true),
]));
let path_struct_t = DataType::Struct(Fields::from(vec![
Field::new("layer", DataType::Int16, false),
Field::new("dtype", DataType::Int16, false),
Field::new("layer", DataType::UInt32, false),
Field::new("path_type", DataType::Int16, true),
Field::new("extension_start", DataType::Int32, true),
Field::new("extension_end", DataType::Int32, true),
@ -174,8 +169,7 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
]));
let boxnode_struct_t = DataType::Struct(Fields::from(vec![
Field::new("layer", DataType::Int16, false),
Field::new("dtype", DataType::Int16, false),
Field::new("layer", DataType::UInt32, false),
Field::new("xy", coords_t.clone(), false),
Field::new("properties", property_list_t.clone(), true),
]));
@ -205,6 +199,10 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
Field::new_list_field(DataType::Utf8, false)
));
let layer_list_t = DataType::List(Arc::new(
Field::new_list_field(DataType::UInt32, false)
));
let time_t = DataType::FixedSizeList(Arc::new(
Field::new_list_field(DataType::Int16, false),
),
@ -232,8 +230,9 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
Field::new("lib_name", DataType::Utf8, false),
Field::new("mod_time", time_t.clone(), false),
Field::new("acc_time", time_t.clone(), false),
Field::new("cell_names", name_list_t, false),
Field::new("cells", cells_list_t, false),
Field::new("cell_names", name_list_t, false),
Field::new("layers", layer_list_t, false),
],
0,
);
@ -255,7 +254,8 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
atl_builder.append(true);
let mut names = HashMap::<String, u32>::new();
let cells_builder = lib_builder.field_builder::<DListBuilder>(6).unwrap();
let mut layers = HashMap::<u32, u32>::new();
let cells_builder = lib_builder.field_builder::<DListBuilder>(5).unwrap();
let (mut input, mut header) = RecordHeader::read(input)?;
while header.tag != records::RTAG_ENDLIB {
@ -277,7 +277,7 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
let offset_builder = cell_builder.field_builder::<UInt64Builder>(1).unwrap();
offset_builder.append_value(position.try_into().unwrap());
(input, _) = read_elements(input, cell_builder, &mut names)?;
(input, _) = read_elements(input, cell_builder, &mut names, &mut layers)?;
cell_builder.append(true);
}
@ -286,13 +286,21 @@ pub fn read_library(input: &[u8]) -> IResult<StructArray> {
cells_builder.append(true);
let mut ids: HashMap<u32, String> = names.into_iter().map(|(kk, vv)| (vv, kk)).collect();
let names_builder = lib_builder.field_builder::<DListBuilder>(5).unwrap();
let names_builder = lib_builder.field_builder::<DListBuilder>(6).unwrap();
let name_builder = names_builder.values().as_any_mut().downcast_mut::<StringBuilder>().unwrap();
for id in 0..ids.len() {
name_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap());
}
names_builder.append(true);
let mut layer_ids: HashMap<u32, u32> = layers.into_iter().map(|(kk, vv)| (vv, kk)).collect();
let layers_builder = lib_builder.field_builder::<DListBuilder>(7).unwrap();
let layer_builder = layers_builder.values().as_any_mut().downcast_mut::<UInt32Builder>().unwrap();
for layer_id in 0..layer_ids.len() {
layer_builder.append_value(layer_ids.remove(&layer_id.try_into().unwrap()).unwrap());
}
layers_builder.append(true);
lib_builder.append(true);
let lib = lib_builder.finish();
Ok((input, lib))