From 4aa14c4914e42e4eaa31ead81f42b6b123ca15b2 Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 11 Apr 2025 11:18:56 -0700 Subject: [PATCH] various cleanup and add some example interfaces --- Cargo.toml | 2 +- src/basic.rs | 12 +- src/elements.rs | 179 +------------------- src/iface.rs | 425 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 28 ++-- src/library.rs | 214 +++++++++++++++++++++++- src/misc.py | 40 +++++ src/record.rs | 44 ++--- 8 files changed, 721 insertions(+), 223 deletions(-) create mode 100644 src/iface.rs create mode 100644 src/misc.py diff --git a/Cargo.toml b/Cargo.toml index d878d23..c7deda4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,4 +12,4 @@ crate-type = ["cdylib", "rlib"] [dependencies] byteorder = "^1" -arrow = "*" +arrow = "^54" diff --git a/src/basic.rs b/src/basic.rs index 1019be1..47e2427 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -39,19 +39,19 @@ pub fn take_bytes>(input: &[u8], count: CC) -> IResult<&[u8]> { */ pub fn parse_u16(input: &[u8]) -> IResult { let (input, buf) = take_bytes(input, 2_usize)?; - let val = BigEndian::read_u16(&buf); + let val = BigEndian::read_u16(buf); Ok((input, val)) } pub fn parse_int2(input: &[u8]) -> IResult { let (input, buf) = take_bytes(input, 2_usize)?; - let val = BigEndian::read_i16(&buf); + let val = BigEndian::read_i16(buf); Ok((input, val)) } pub fn parse_int4(input: &[u8]) -> IResult { let (input, buf) = take_bytes(input, 4_usize)?; - let val = BigEndian::read_i32(&buf); + let val = BigEndian::read_i32(buf); Ok((input, val)) } @@ -69,7 +69,7 @@ pub fn decode_real8(int: u64) -> f64 { pub fn parse_real8(input: &[u8]) -> IResult { let (input, buf) = take_bytes(input, 8_usize)?; - let data = BigEndian::read_u64(&buf); + let data = BigEndian::read_u64(buf); Ok((input, decode_real8(data))) } @@ -77,8 +77,8 @@ pub fn parse_real8(input: &[u8]) -> IResult { pub fn parse_datetime(input: &[u8]) -> IResult<[i16; 6]> { let mut buf = [0_i16; 6]; let mut input = input; - for ii in 0..6 { - (input, buf[ii]) = parse_int2(input)?; + for bb in &mut buf { + (input, *bb) = parse_int2(input)?; } buf[0] += 1900; // Year is from 1900 Ok((input, buf)) diff --git a/src/elements.rs b/src/elements.rs index 479ea3b..3e41115 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -19,180 +19,13 @@ use std::string::String; use std::collections::HashMap; //use std::io::Write; -use std::sync::Arc; -use arrow::datatypes::{DataType, Field, Fields}; use arrow::array::{ StructBuilder, ListBuilder, StringBuilder, ArrayBuilder, Float64Builder, BooleanBuilder, - Int32Builder, Int16Builder, UInt64Builder, UInt32Builder, UInt8Builder, - StructArray, + Int32Builder, Int16Builder, UInt32Builder, UInt8Builder, }; -type DListBuilder = ListBuilder>; - - -pub fn read_library(input: &[u8]) -> IResult { - let input_size = input.len(); - - let property_t = DataType::Struct(Fields::from(vec![ - Field::new("key", DataType::Int16, false), - Field::new("value", DataType::Utf8, false), - ])); - - let property_list_t = DataType::List(Arc::new( - Field::new_list_field(property_t, false) - )); - - - let repetition_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("x0", DataType::Int32, false), - Field::new("y0", DataType::Int32, false), - Field::new("x1", DataType::Int32, false), - Field::new("y1", DataType::Int32, false), - Field::new("count0", DataType::Int16, false), - Field::new("count1", DataType::Int16, false), - ])); - - - let ref_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("target", DataType::UInt32, false), - Field::new("invert_y", DataType::Boolean, true), - Field::new("mag", DataType::Float64, true), - Field::new("angle_deg", DataType::Float64, true), - Field::new("x", DataType::Int32, false), - Field::new("y", DataType::Int32, false), - Field::new("repetition", repetition_struct_t, true), - Field::new("properties", property_list_t.clone(), true), - ])); - - - let text_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("presentation_horiz", DataType::UInt8, true), - Field::new("presentation_vert", DataType::UInt8, true), - Field::new("presentation_font", DataType::UInt8, true), - Field::new("path_type", DataType::Int16, true), - Field::new("width", DataType::Int32, true), - Field::new("invert_y", DataType::Boolean, true), - Field::new("mag", DataType::Float64, true), - Field::new("angle_deg", DataType::Float64, true), - Field::new("x", DataType::Int32, false), - Field::new("y", DataType::Int32, false), - Field::new("string", DataType::Utf8, false), - Field::new("properties", property_list_t.clone(), true), - ])); - - - let coords_t = DataType::List(Arc::new( - Field::new_list_field(DataType::Int32, false) - )); - - let boundary_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("xy", coords_t.clone(), false), - Field::new("properties", property_list_t.clone(), true), - ])); - - let path_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("path_type", DataType::Int16, false), - Field::new("extension_start", DataType::Int32, true), - Field::new("extension_end", DataType::Int32, true), - Field::new("width", DataType::Int32, false), - Field::new("xy", coords_t.clone(), false), - Field::new("properties", property_list_t.clone(), true), - ])); - - let boxnode_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("xy", coords_t.clone(), false), - Field::new("properties", property_list_t.clone(), true), - ])); - - - let ref_list_t = DataType::List(Arc::new( - Field::new_list_field(ref_struct_t, false) - )); - - let text_list_t = DataType::List(Arc::new( - Field::new_list_field(text_struct_t, false) - )); - - let boundary_list_t = DataType::List(Arc::new( - Field::new_list_field(boundary_struct_t, false) - )); - - let path_list_t = DataType::List(Arc::new( - Field::new_list_field(path_struct_t, false) - )); - - let boxnode_list_t = DataType::List(Arc::new( - Field::new_list_field(boxnode_struct_t, false) - )); - - - let cell_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("id", DataType::UInt32, false), - Field::new("file_offset", DataType::UInt64, false), - Field::new("refs", ref_list_t, false), - Field::new("boundaries", boundary_list_t, false), - Field::new("paths", path_list_t, false), - Field::new("nodes", boxnode_list_t.clone(), true), - Field::new("boxes", boxnode_list_t.clone(), true), - Field::new("texts", text_list_t, false), - ])); - - let mut lib_builder = StructBuilder::from_fields(vec![ - Field::new("cell_names", DataType::Utf8, false), - Field::new("cells", cell_struct_t, false), - ], - 0, - ); - - let cells_builder = lib_builder.field_builder::(0).unwrap(); - - - let mut names = HashMap::::new(); - - let (mut input, mut header) = RecordHeader::read(input)?; - while header.tag != records::RTAG_ENDLIB { - (input, _) = take_bytes(input, header.data_size)?; - if header.tag == records::RTAG_BGNSTR { - let name_bytes; - (input, name_bytes) = records::STRNAME::read(input)?; - let name = String::from_utf8(name_bytes).unwrap(); - - let next_id = names.len(); - let id = names.entry(name).or_insert(next_id.try_into().unwrap()); - let position = input_size - input.len(); - - let cell_builder = cells_builder.values().as_any_mut().downcast_mut::().unwrap(); - let id_builder = cell_builder.field_builder::(0).unwrap(); - id_builder.append_value(*id); - - let offset_builder = cell_builder.field_builder::(1).unwrap(); - offset_builder.append_value(position.try_into().unwrap()); - - (input, _) = read_elements(input, cell_builder, &mut names)?; - - cells_builder.append(true); - } - (input, header) = RecordHeader::read(input)?; - } - - let mut ids: HashMap = names.into_iter().map(|(kk, vv)| (vv, kk)).collect(); - let names_builder = lib_builder.field_builder::(1).unwrap(); - for id in 0..ids.len() { - names_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); - } - - let lib = lib_builder.finish(); - Ok((input, lib)) -} +pub type DListBuilder = ListBuilder>; pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, names: &mut HashMap) -> IResult<'a, ()> { @@ -266,7 +99,7 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let mut bgn_ext = None; let mut end_ext = None; - let (mut input, mut header) = RecordHeader::read(&input)?; + let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_XY { match header.tag { records::RTAG_PATHTYPE => { @@ -292,7 +125,7 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu _ => return fail(input, format!("Unexpected tag {:04x}", header.tag)), }; - (input, header) = RecordHeader::read(&input)?; + (input, header) = RecordHeader::read(input)?; } let path_type_builder = path_builder.field_builder::(2).unwrap(); path_type_builder.append_option(path_type); @@ -827,7 +660,7 @@ impl Element for Path { let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = DATATYPE::read(input)?; - let (mut input, mut header) = RecordHeader::read(&input)?; + let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_XY { match header.tag { records::RTAG_PATHTYPE => @@ -841,7 +674,7 @@ impl Element for Path { _ => return fail(input, format!("Unexpected tag {:04x}", header.tag)), }; - (input, header) = RecordHeader::read(&input)?; + (input, header) = RecordHeader::read(input)?; } let (input, xy) = XY::read_data(input, header.data_size)?; let (input, properties) = read_properties(input)?; diff --git a/src/iface.rs b/src/iface.rs new file mode 100644 index 0000000..6e88dce --- /dev/null +++ b/src/iface.rs @@ -0,0 +1,425 @@ +/* + * Shapes: + * layer, dtype (2x i16) + * bounds (4x i32) + * offset/ptr (usize) + * cell_id (usize) + * + * Cell: + * name (??) + * offset (usize) + * len (usize) + * + * Refs: + * name_ind (usize) + * invert (bool) + * mag (f64) + * angle (f64) + * offset/ptr (usize) + * + */ + + +use arrow; +use arrow::array::{LargeStringArray, DictionaryArray, UInt64Array, Int32Array, UInt16Array} +use arrow::builder::{LargeStringDictionaryBuilder, UInt64Builder, Int32Builder, Int64Builder, BooleanBuilder}; +use library::FileHeader; + +use std::collections::HashMap; + +pub use crate::record; +pub use crate::record::{RecordHeader, Record}; +pub use crate::records; +pub use crate::elements; +pub use crate::elements::{Element}; +pub use crate::basic::{IResult, OResult, take_bytes, fail}; + + + +const DEFAULT_DATE: [i16; 6] = [1900, 0, 0, 0, 0, 0]; + +#[derive(Debug, Clone)] +pub struct LibraryBuilder { + cell_name: LargeStringDictionaryBuilder, + cell_offset: UInt64Builder, + + ref_parent: UInt64Builder, + ref_name: LargeStringDictionaryBuilder, + ref_mirror: BooleanBuilder, + ref_mag: Float64Builder, + ref_angle: Float64Builder, + ref_offset: UInt64Builder, + + shape_parent: UInt64Builder, + shape_offset: UInt64Builder, + shape_type: UInt8Builder, + shape_layer: UInt16Builder, + shape_dtype: UInt16Builder, + shape_xmin: Int32Builder, + shape_ymin: Int32Builder, + shape_xmax: Int32Builder, + shape_ymax: Int32Builder, + shape_data: LargeListArrayBuilder, + + text_parent: UInt64Builder, + text_string: LargeStringDictionaryBuilder, + text_x: Int32Builder, + text_y: Int32Builder, + + + + boundaries: Vec, + paths: Vec, + nodes: Vec, + boxes: Vec, + texts: Vec, + refs: Vec, +} + +impl Cell { + /// Build an empty cell + pub fn new(name: Vec) -> Self { + Cell{ + name: name, + boundaries: Vec::new(), + paths: Vec::new(), + nodes: Vec::new(), + boxes: Vec::new(), + texts: Vec::new(), + refs: Vec::new(), + } + } + + /// Skip to the next structure and attempt to read it. + /// + /// Args: + /// input: Seekable input to read from. + /// + /// Returns: + /// (name, elements) if a structure was found. + /// None if no structure was found before the end of the library. + /// + pub fn read(input: &[u8]) -> IResult> { + let (input, success) = records::BGNSTR::skip_past(input)?; + if !success { + return Ok((input, None)) + } + + let (input, name) = records::STRNAME::read(input)?; + let mut cell = Cell::new(name); + let (input, _) = cell.read_elements(input)?; + Ok((input, Some(cell))) + } + + /// Read elements from the input until an ENDSTR + /// record is encountered. The ENDSTR record is also + /// consumed. + /// + /// Args: + /// input: Seekable input to read from. + /// + /// Returns: + /// List of element objects. + /// + pub fn read_elements<'a>(&mut self, input: &'a [u8]) -> IResult<'a, ()> { + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDSTR { + match header.tag { + records::RTAG_BOUNDARY => { + let boundary; + (input, _) = records::BOUNDARY::read(input)?; + (input, boundary) = elements::Boundary::read(input)?; + self.boundaries.push(boundary); + }, + records::RTAG_PATH => { + let path; + (input, _) = records::PATH::read(input)?; + (input, path) = elements::Path::read(input)?; + self.paths.push(path); + }, + records::RTAG_NODE => { + let node; + (input, _) = records::NODE::read(input)?; + (input, node) = elements::Node::read(input)?; + self.nodes.push(node); + }, + records::RTAG_BOX => { + let gds_box; + (input, _) = records::BOX::read(input)?; + (input, gds_box) = elements::GDSBox::read(input)?; + self.boxes.push(gds_box); + }, + records::RTAG_TEXT => { + let txt; + (input, _) = records::TEXT::read(input)?; + (input, txt) = elements::Text::read(input)?; + self.texts.push(txt); + }, + records::RTAG_SREF => { + let sref; + (input, _) = records::SREF::read(input)?; + (input, sref) = elements::Reference::read(input)?; + self.refs.push(sref); + }, + records::RTAG_AREF => { + let aref; + (input, _) = records::AREF::read(input)?; + (input, aref) = elements::Reference::read(input)?; + self.refs.push(aref); + }, + _ => { + // don't care, skip + (input, _) = take_bytes(input, header.data_size)?; + } + } + (input, header) = RecordHeader::read(input)?; + } + Ok((input, ())) + } + + /// + /// Write a structure to the provided input. + /// + /// Args: + /// name: Structure name (ascii-encoded). + /// elements: List of Elements containing the geometry and text in this struct. + /// cre_time: Creation time (optional). + /// mod_time: Modification time (optional). + /// + /// Return: + /// Number of bytes written + /// + pub fn write( + &self, + ww: &mut W, + cre_time: Option<[i16; 6]>, + mod_time: Option<[i16; 6]>, + ) -> OResult { + let mut size = 0; + size += records::BGNSTR::write(ww, &[cre_time.unwrap_or(DEFAULT_DATE), + mod_time.unwrap_or(DEFAULT_DATE)])?; + size += records::STRNAME::write(ww, &self.name)?; + size += self.write_elements(ww)?; + size += records::ENDSTR::write(ww, &())?; + Ok(size) + } + + pub fn write_elements(&self, ww: &mut W) -> OResult { + let mut size = 0; + for boundary in &self.boundaries { + size += boundary.write(ww)?; + } + for path in &self.paths { + size += path.write(ww)?; + } + for node in &self.nodes { + size += node.write(ww)?; + } + for gds_box in &self.boxes { + size += gds_box.write(ww)?; + } + for text in &self.texts { + size += text.write(ww)?; + } + for reference in &self.refs { + size += reference.write(ww)?; + } + Ok(size) + } +} + + + +pub fn make_table(input: &[u8]) -> IResult, usize>> { + let input_size = input.len(); + let mut names = LargeStringDictionaryBuilder::new(); + let mut positions = UInt64Builder::new(); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDLIB { + (input, let record_bytes) = take_bytes(input, header.data_size)?; + if header.tag == records::RTAG_BGNSTR { + let position = input_size - input.len(); + + let name; + (input, name) = records::STRNAME::read(record_bytes)?; + + names.append(name); + positions.append(position); + + + pub fn read_elements<'a>(&mut self, input: &'a [u8]) -> IResult<'a, ()> { + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDSTR { + match header.tag { + records::RTAG_BOUNDARY => { + let boundary; + (input, _) = records::BOUNDARY::read(input)?; + (input, boundary) = elements::Boundary::read(input)?; + self.boundaries.push(boundary); + }, + records::RTAG_PATH => { + let path; + (input, _) = records::PATH::read(input)?; + (input, path) = elements::Path::read(input)?; + self.paths.push(path); + }, + records::RTAG_NODE => { + let node; + (input, _) = records::NODE::read(input)?; + (input, node) = elements::Node::read(input)?; + self.nodes.push(node); + }, + records::RTAG_BOX => { + let gds_box; + (input, _) = records::BOX::read(input)?; + (input, gds_box) = elements::GDSBox::read(input)?; + self.boxes.push(gds_box); + }, + records::RTAG_TEXT => { + let txt; + (input, _) = records::TEXT::read(input)?; + (input, txt) = elements::Text::read(input)?; + self.texts.push(txt); + }, + records::RTAG_SREF => { + let sref; + (input, _) = records::SREF::read(input)?; + (input, sref) = elements::Reference::read(input)?; + self.refs.push(sref); + }, + records::RTAG_AREF => { + let aref; + (input, _) = records::AREF::read(input)?; + (input, aref) = elements::Reference::read(input)?; + self.refs.push(aref); + }, + _ => { + // don't care, skip + (input, _) = take_bytes(input, header.data_size)?; + } + } + (input, header) = RecordHeader::read(input)?; + } + Ok((input, ())) + } + + + + } + (input, header) = RecordHeader::read(input)?; + } + Ok((input, positions)) +} + + + + +pub struct References { + invert_y: Vec, + mag_and_angle_deg: Vec, + xy: Vec, + + names: Vec, + name_inds: Vec, + + has_prop: Vec, + has_rep: Vec, +} + +pub struct Repetitions { + colrow_vecs: Vec, // 4 per + colrow_counts: Vec, // 2 per +} + +pub struct Shapes { + layers_and_dtypes: Vec, // 2 per + xy: Vec, // variable per + has_prop: Vec, +} + + + +pub struct References { + invert_y: bool, + mag: f64, + angle_deg: f64, + + xy: (i32, i32), + + // Use id to look up these... maybe include has_props and has_rep? + struct_name: Vec, + properties: HashMap::>, + + col_vec: (i32 i32), + row_vec: (i32, i32), + colrow: (i16, i16), +} + +pub struct Boundary { + /// (layer, data_type) tuple + layer: (i16, i16), + /// Ordered vertices of the shape. First and last points should be identical. Order x0, y0, x1,... + xy: Vec, + /// Properties for the element. + properties: HashMap::>, +} + +pub struct Path { + /// (layer, data_type) tuple + layer: (i16, i16), + /// End-cap type (0: flush, 1: circle, 2: square, 4: custom) + path_type: i16, + /// Path width + width: i32, + /// Extension when using path_type=4. Ignored otherwise. + extension: (i32, i32), + /// Path centerline coordinates. [x0, y0, x1, y1,...] + xy: Vec, + /// Properties for the element. + properties: HashMap::>, +} + +pub struct GDSBox { + /// (layer, box_type) tuple + layer: (i16, i16), + /// Box coordinates (5 pairs) + xy: Vec, + /// Properties for the element. + properties: HashMap::>, +} +pub struct Node { + /// (layer, box_type) tuple + layer: (i16, i16), + /// 1-50 pairs of coordinates. + xy: Vec, + /// Properties for the element. + properties: HashMap::>, +} +pub struct Text { + /// (layer, node_type) tuple + layer: (i16, i16), + + /// Bit array. Default all zeros. + /// bits 0-1: 00 left/01 center/10 right + /// bits 2-3: 00 top/01 middle/10 bottom + /// bits 4-5: font number + presentation: [bool; 16], + + /// Default 0 + path_type: i16, + /// Default 0 + width: i32, + /// Vertical inversion. Default false. + invert_y: bool, + /// Scaling factor. Default 1. + mag: f64, + /// Rotation (ccw). Default 0. + angle_deg: f64, + /// Position (1 pair only) + xy: Vec, + /// Text content + string: Vec, + /// Properties for the element. + properties: HashMap::> +} diff --git a/src/lib.rs b/src/lib.rs index cf8365c..0f8c365 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,39 +48,39 @@ macro_rules! impl_i32be { #[no_mangle] -pub extern "C" fn f64_to_i16(arr: *mut f64, size: usize) -> f64 { impl_i16be!(f64, arr, size) } +pub unsafe extern "C" fn f64_to_i16(arr: *mut f64, size: usize) -> f64 { impl_i16be!(f64, arr, size) } #[no_mangle] -pub extern "C" fn f64_to_i32(arr: *mut f64, size: usize) -> f64 { impl_i32be!(f64, arr, size) } +pub unsafe extern "C" fn f64_to_i32(arr: *mut f64, size: usize) -> f64 { impl_i32be!(f64, arr, size) } #[no_mangle] -pub extern "C" fn f32_to_i16(arr: *mut f32, size: usize) -> f32 { impl_i16be!(f32, arr, size) } +pub unsafe extern "C" fn f32_to_i16(arr: *mut f32, size: usize) -> f32 { impl_i16be!(f32, arr, size) } #[no_mangle] -pub extern "C" fn f32_to_i32(arr: *mut f32, size: usize) -> f32 { impl_i32be!(f32, arr, size) } +pub unsafe extern "C" fn f32_to_i32(arr: *mut f32, size: usize) -> f32 { impl_i32be!(f32, arr, size) } #[no_mangle] -pub extern "C" fn u64_to_i16(arr: *mut u64, size: usize) -> u64 { impl_i16be!(u64, arr, size) } +pub unsafe extern "C" fn u64_to_i16(arr: *mut u64, size: usize) -> u64 { impl_i16be!(u64, arr, size) } #[no_mangle] -pub extern "C" fn u64_to_i32(arr: *mut u64, size: usize) -> u64 { impl_i32be!(u64, arr, size) } +pub unsafe extern "C" fn u64_to_i32(arr: *mut u64, size: usize) -> u64 { impl_i32be!(u64, arr, size) } #[no_mangle] -pub extern "C" fn i64_to_i16(arr: *mut i64, size: usize) -> i64 { impl_i16be!(i64, arr, size) } +pub unsafe extern "C" fn i64_to_i16(arr: *mut i64, size: usize) -> i64 { impl_i16be!(i64, arr, size) } #[no_mangle] -pub extern "C" fn i64_to_i32(arr: *mut i64, size: usize) -> i64 { impl_i32be!(i64, arr, size) } +pub unsafe extern "C" fn i64_to_i32(arr: *mut i64, size: usize) -> i64 { impl_i32be!(i64, arr, size) } #[no_mangle] -pub extern "C" fn u32_to_i16(arr: *mut u32, size: usize) -> u32 { impl_i16be!(u32, arr, size) } +pub unsafe extern "C" fn u32_to_i16(arr: *mut u32, size: usize) -> u32 { impl_i16be!(u32, arr, size) } #[no_mangle] -pub extern "C" fn u32_to_i32(arr: *mut u32, size: usize) -> u32 { impl_i32be!(u32, arr, size) } +pub unsafe extern "C" fn u32_to_i32(arr: *mut u32, size: usize) -> u32 { impl_i32be!(u32, arr, size) } #[no_mangle] -pub extern "C" fn i32_to_i16(arr: *mut i32, size: usize) -> i32 { impl_i16be!(i32, arr, size) } +pub unsafe extern "C" fn i32_to_i16(arr: *mut i32, size: usize) -> i32 { impl_i16be!(i32, arr, size) } #[no_mangle] -pub extern "C" fn i32_to_i32(arr: *mut i32, size: usize) -> i32 { impl_i32be!(i32, arr, size) } +pub unsafe extern "C" fn i32_to_i32(arr: *mut i32, size: usize) -> i32 { impl_i32be!(i32, arr, size) } #[no_mangle] -pub extern "C" fn u16_to_i16(arr: *mut u16, size: usize) -> u16 { impl_i16be!(u16, arr, size) } +pub unsafe extern "C" fn u16_to_i16(arr: *mut u16, size: usize) -> u16 { impl_i16be!(u16, arr, size) } #[no_mangle] -pub extern "C" fn i16_to_i16(arr: *mut i16, size: usize) -> i16 { impl_i16be!(i16, arr, size) } +pub unsafe extern "C" fn i16_to_i16(arr: *mut i16, size: usize) -> i16 { impl_i16be!(i16, arr, size) } diff --git a/src/library.rs b/src/library.rs index ee392ed..bad6229 100644 --- a/src/library.rs +++ b/src/library.rs @@ -9,11 +9,18 @@ pub use crate::record; pub use crate::record::{RecordHeader, Record}; pub use crate::records; pub use crate::elements; -//pub use crate::elements::{Element}; +pub use crate::elements::{read_elements, DListBuilder}; pub use crate::basic::{IResult, OResult, take_bytes, fail}; +use std::string::String; +use std::collections::HashMap; +use std::sync::Arc; -const DEFAULT_DATE: [i16; 6] = [1900, 0, 0, 0, 0, 0]; +use arrow::datatypes::{DataType, Field, Fields}; +use arrow::array::{ + StructBuilder, StringBuilder, UInt64Builder, UInt32Builder, Int16Builder, Float64Builder, + FixedSizeListBuilder, StructArray, +}; /// @@ -44,8 +51,8 @@ impl FileHeader { mod_time: [0, 1, 1, 0, 0, 0], acc_time: [0, 1, 1, 0, 0, 0], name: name.to_owned(), - user_units_per_db_unit: user_units_per_db_unit, - meters_per_db_unit: meters_per_db_unit, + user_units_per_db_unit, + meters_per_db_unit, } } @@ -64,9 +71,9 @@ impl FileHeader { let (input, (uu, dbu)) = records::UNITS::skip_and_read(input)?; Ok((input, FileHeader{ - mod_time: mod_time, - acc_time: acc_time, - name: name, + mod_time, + acc_time, + name, user_units_per_db_unit: uu, meters_per_db_unit: dbu, })) @@ -90,6 +97,199 @@ impl FileHeader { } } + +pub fn read_library(input: &[u8]) -> IResult { + let input_size = input.len(); + + let property_t = DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int16, false), + Field::new("value", DataType::Utf8, false), + ])); + + let property_list_t = DataType::List(Arc::new( + Field::new_list_field(property_t, false) + )); + + + let repetition_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("x0", DataType::Int32, false), + Field::new("y0", DataType::Int32, false), + Field::new("x1", DataType::Int32, false), + Field::new("y1", DataType::Int32, false), + Field::new("count0", DataType::Int16, false), + Field::new("count1", DataType::Int16, false), + ])); + + + let ref_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, true), + Field::new("mag", DataType::Float64, true), + Field::new("angle_deg", DataType::Float64, true), + Field::new("x", DataType::Int32, false), + Field::new("y", DataType::Int32, false), + Field::new("repetition", repetition_struct_t, true), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let text_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("presentation_horiz", DataType::UInt8, true), + Field::new("presentation_vert", DataType::UInt8, true), + Field::new("presentation_font", DataType::UInt8, true), + Field::new("path_type", DataType::Int16, true), + Field::new("width", DataType::Int32, true), + Field::new("invert_y", DataType::Boolean, true), + Field::new("mag", DataType::Float64, true), + Field::new("angle_deg", DataType::Float64, true), + Field::new("x", DataType::Int32, false), + Field::new("y", DataType::Int32, false), + Field::new("string", DataType::Utf8, false), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let coords_t = DataType::List(Arc::new( + Field::new_list_field(DataType::Int32, false) + )); + + let boundary_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + let path_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("path_type", DataType::Int16, false), + Field::new("extension_start", DataType::Int32, true), + Field::new("extension_end", DataType::Int32, true), + Field::new("width", DataType::Int32, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + let boxnode_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let ref_list_t = DataType::List(Arc::new( + Field::new_list_field(ref_struct_t, false) + )); + + let text_list_t = DataType::List(Arc::new( + Field::new_list_field(text_struct_t, false) + )); + + let boundary_list_t = DataType::List(Arc::new( + Field::new_list_field(boundary_struct_t, false) + )); + + let path_list_t = DataType::List(Arc::new( + Field::new_list_field(path_struct_t, false) + )); + + let boxnode_list_t = DataType::List(Arc::new( + Field::new_list_field(boxnode_struct_t, false) + )); + + let name_list_t = DataType::List(Arc::new( + Field::new_list_field(DataType::Utf8, false) + )); + + let time_t = DataType::FixedSizeList(Arc::new( + Field::new_list_field(DataType::Int16, false), + ), + 6, + ); + + let cell_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("id", DataType::UInt32, false), + Field::new("file_offset", DataType::UInt64, false), + Field::new("refs", ref_list_t, false), + Field::new("boundaries", boundary_list_t, false), + Field::new("paths", path_list_t, false), + Field::new("nodes", boxnode_list_t.clone(), true), + Field::new("boxes", boxnode_list_t.clone(), true), + Field::new("texts", text_list_t, false), + ])); + + let mut lib_builder = StructBuilder::from_fields(vec![ + Field::new("meters_per_db_unit", DataType::Float64, false), + Field::new("user_units_per_db_unit", DataType::Float64, false), + Field::new("lib_name", DataType::Utf8, false), + Field::new("mod_time", time_t.clone(), false), + Field::new("acc_time", time_t.clone(), false), + Field::new("cell_names", name_list_t, false), + Field::new("cells", cell_struct_t, false), + ], + 0, + ); + + let (input, header) = FileHeader::read(input)?; + let dbu_builder = lib_builder.field_builder::(0).unwrap(); + dbu_builder.append_value(header.meters_per_db_unit); + let uu_builder = lib_builder.field_builder::(1).unwrap(); + uu_builder.append_value(header.user_units_per_db_unit); + let libname_builder = lib_builder.field_builder::(2).unwrap(); + libname_builder.append_value(String::from_utf8(header.name).unwrap()); + let mt_builder = lib_builder.field_builder::>(3).unwrap(); + mt_builder.values().append_values(&header.mod_time, &[true; 6]); + let at_builder = lib_builder.field_builder::>(4).unwrap(); + at_builder.values().append_values(&header.acc_time, &[true; 6]); + + + + let cells_builder = lib_builder.field_builder::(5).unwrap(); + + + let mut names = HashMap::::new(); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDLIB { + (input, _) = take_bytes(input, header.data_size)?; + if header.tag == records::RTAG_BGNSTR { + let name_bytes; + (input, name_bytes) = records::STRNAME::read(input)?; + let name = String::from_utf8(name_bytes).unwrap(); + + let next_id = names.len(); + let id = names.entry(name).or_insert(next_id.try_into().unwrap()); + let position = input_size - input.len(); + + let cell_builder = cells_builder.values().as_any_mut().downcast_mut::().unwrap(); + let id_builder = cell_builder.field_builder::(0).unwrap(); + id_builder.append_value(*id); + + let offset_builder = cell_builder.field_builder::(1).unwrap(); + offset_builder.append_value(position.try_into().unwrap()); + + (input, _) = read_elements(input, cell_builder, &mut names)?; + + cells_builder.append(true); + } + (input, header) = RecordHeader::read(input)?; + } + + let mut ids: HashMap = names.into_iter().map(|(kk, vv)| (vv, kk)).collect(); + let names_builder = lib_builder.field_builder::(6).unwrap(); + for id in 0..ids.len() { + names_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); + } + + lib_builder.append(true); + let lib = lib_builder.finish(); + Ok((input, lib)) +} + /* /// /// Scan through a GDS file, building a table of diff --git a/src/misc.py b/src/misc.py new file mode 100644 index 0000000..ab4c9bf --- /dev/null +++ b/src/misc.py @@ -0,0 +1,40 @@ +''' +https://github.com/apache/arrow/blob/main/python/pyarrow/tests/test_cffi.py +''' +# -*- coding: utf-8 -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +import ctypes +import pyarrow as pa +from pyarrow.cffi import ffi + + c_schema = ffi.new('struct ArrowSchema*') + ptr_schema = int(ffi.cast('uintptr_t', c_schema)) + c_array = ffi.new('struct ArrowArray*') + ptr_array = int(ffi.cast('uintptr_t', c_array)) + + # pyarrow.Array._import_from_c(ptr_array, pa.list_(pa.int32())) + + # import gc + # gc.collect() # Make sure no Arrow data dangles in a ref cycle + # pyarrow.Array._export_from_c(arr, ptr_array, ptr_schema) + + arr_new = pyarrow.Array._import_from_c(ptr_array, ptr_schema) + + diff --git a/src/record.rs b/src/record.rs index da2e12b..8d106c9 100644 --- a/src/record.rs +++ b/src/record.rs @@ -23,10 +23,10 @@ pub struct RecordHeader { } impl RecordHeader { - pub fn read<'a>(input: &[u8]) -> IResult { + pub fn read(input: &[u8]) -> IResult { let (input, size) = parse_u16(input)?; let (input, tag) = parse_u16(input)?; - Ok((input, RecordHeader{tag:tag, data_size:size - 4})) + Ok((input, RecordHeader{tag, data_size:size - 4})) } pub fn pack_into(&self) -> [u8; 4] { @@ -49,7 +49,7 @@ pub trait RecordData { type InData : ?Sized; type ByteData : AsRef<[u8]>; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>>; + fn read(input: &[u8], size: u16) -> IResult>; fn pack_into(buf: &mut [u8], data: &Self::InData); //fn size(data: &Self::BareData<'_>) -> u16; fn pack(data: &Self::InData) -> Self::ByteData; @@ -76,7 +76,7 @@ pub trait Record { } fn write_header(ww: &mut W, data_size: u16) -> OResult { - RecordHeader{tag: Self::tag(), data_size: data_size}.write(ww) + RecordHeader{tag: Self::tag(), data_size}.write(ww) } fn read_data(input: &[u8], size: u16) -> IResult> { @@ -127,7 +127,7 @@ pub trait Record { } } - fn read<'a>(input: &'a [u8]) -> IResult<'a, RData::BareData<'a>> { + fn read(input: &[u8]) -> IResult> { let (input, size) = Self::expect_header(input)?; Self::check_size(size).unwrap(); let (input, data) = Self::read_data(input, size)?; @@ -151,7 +151,7 @@ impl RecordData for BitArray { type InData = [bool; 16]; type ByteData = [u8; 2]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 2); parse_bitarray(input) } @@ -174,7 +174,7 @@ impl RecordData for Int2 { type InData = i16; type ByteData = [u8; 2]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 2); parse_int2(input) } @@ -196,7 +196,7 @@ impl RecordData for Int4 { type InData = i32; type ByteData = [u8; 4]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 4); parse_int4(input) } @@ -219,15 +219,15 @@ impl RecordData for Int2Array { type InData = [i16]; type ByteData = Vec; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size % 2 == 0, "Record must contain an integer quantity of integers"); //let mut input = input; let (input, bytes) = take_bytes(input, size)?; - Ok((input, Int2ArrayReader{bytes: bytes})) + Ok((input, Int2ArrayReader{bytes})) } fn pack_into(buf: &mut [u8], data: &Self::InData) { - BigEndian::write_i16_into(&data, buf) + BigEndian::write_i16_into(data, buf) } fn pack(data: &Self::InData) -> Self::ByteData { @@ -260,15 +260,15 @@ impl RecordData for Int4Array { type InData = [i32]; type ByteData = Vec; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size % 4 == 0, "Record must contain an integer quantity of integers"); //let mut input = input; let (input, bytes) = take_bytes(input, size)?; - Ok((input, Int4ArrayReader{bytes: bytes})) + Ok((input, Int4ArrayReader{bytes})) } fn pack_into(buf: &mut [u8], data: &Self::InData) { - BigEndian::write_i32_into(&data, buf) + BigEndian::write_i32_into(data, buf) } fn pack(data: &Self::InData) -> Self::ByteData { @@ -301,13 +301,13 @@ impl RecordData for Real8 { type InData = f64; type ByteData = [u8; 8]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 8); parse_real8(input) } fn pack_into(buf: &mut [u8], data: &Self::InData) { - pack_real8(buf, *data).expect(&format!("Float {0} too big for Real8", data)) + pack_real8(buf, *data).unwrap_or_else(|_| panic!("Float {0} too big for Real8", data)) } fn pack(data: &Self::InData) -> Self::ByteData { @@ -323,7 +323,7 @@ impl RecordData for Real8Pair { type InData = (f64, f64); type ByteData = [u8; 2 * 8]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 2 * 8); let (input, data0) = parse_real8(input)?; let (input, data1) = parse_real8(input)?; @@ -331,8 +331,8 @@ impl RecordData for Real8Pair { } fn pack_into(buf: &mut [u8], data: &Self::InData) { - pack_real8(&mut buf[8 * 0..], data.0).expect(&format!("Float.0 {0} too big for Real8", data.0)); - pack_real8(&mut buf[8 * 1..], data.1).expect(&format!("Float.1 {0} too big for Real8", data.1)); + pack_real8(&mut buf[8 * 0..], data.0).unwrap_or_else(|_| panic!("Float.0 {0} too big for Real8", data.0)); + pack_real8(&mut buf[8 * 1..], data.1).unwrap_or_else(|_| panic!("Float.1 {0} too big for Real8", data.1)); } fn pack(data: &Self::InData) -> Self::ByteData { @@ -353,7 +353,7 @@ impl RecordData for ASCII { type InData = [u8]; type ByteData = Vec; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { parse_ascii(input, size) } @@ -375,7 +375,7 @@ impl RecordData for DateTimePair { type InData = [[i16; 6]; 2]; type ByteData = [u8; 2 * 6 * 2]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 2 * 6 * 2); let (input, data0) = parse_datetime(input)?; let (input, data1) = parse_datetime(input)?; @@ -405,7 +405,7 @@ impl RecordData for Empty { type InData = (); type ByteData = [u8; 0]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 0); Ok((input, ())) }