From 48db47217afffd8f489c21dd9e01f57b1df65978 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sat, 18 Dec 2021 21:05:00 -0800 Subject: [PATCH 01/31] snapshot 2021-12-18 21:05:00.635887 --- .gitignore | 6 + Cargo.toml | 8 + src/__init__.rs | 29 +++ src/basic.rs | 198 +++++++++++++++ src/elements.rs | 481 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 7 + src/library.rs | 282 ++++++++++++++++++++++ src/record.rs | 219 +++++++++++++++++ src/records.rs | 595 ++++++++++++++++++++++++++++++++++++++++++++++ src/test_basic.rs | 121 ++++++++++ 10 files changed, 1946 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/__init__.rs create mode 100644 src/basic.rs create mode 100644 src/elements.rs create mode 100644 src/lib.rs create mode 100644 src/library.rs create mode 100644 src/record.rs create mode 100644 src/records.rs create mode 100644 src/test_basic.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5d80173 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/target/ +**/*.rs.bk +Cargo.lock + +*.swp +*.swo diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..414196a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "rs-klamath" +version = "0.1.0" +authors = ["jan "] + +[dependencies] +byteorder = "^1" +nom = "^7" diff --git a/src/__init__.rs b/src/__init__.rs new file mode 100644 index 0000000..44d6c6b --- /dev/null +++ b/src/__init__.rs @@ -0,0 +1,29 @@ +/* + * `klamath` is a Python module for reading and writing to the GDSII file format. + * + * The goal is to keep this library simple: + * - Map data types directly wherever possible. + * * Presents an accurate representation of what is saved to the file. + * * Avoids excess copies / allocations for speed. + * * No "automatic" error checking, except when casting datatypes. + * If data integrity checks are provided at all, they must be + * explicitly run by the caller. + * - Low-level functionality is first-class. + * * Meant for use-cases where the caller wants to read or write + * individual GDS records. + * * Offers complete control over the written file. + * - Opinionated and limited high-level functionality. + * * Discards or ignores rarely-encountered data types. + * * Keeps functions simple and reusable. + * * Only de/encodes the file format, doesn't provide tools to modify + * the data itself. + * * Still requires explicit values for most fields. + * - No compilation + * * Uses `numpy` for speed, since it's commonly available / pre-built. + * * Building this library should not require a compiler. + * + * `klamath` was built to provide a fast and versatile GDS interface for + * [masque](https://mpxd.net/code/jan/masque), which provides higher-level + * tools for working with hierarchical design data and supports multiple + * file formats. + */ diff --git a/src/basic.rs b/src/basic.rs new file mode 100644 index 0000000..0cadc0d --- /dev/null +++ b/src/basic.rs @@ -0,0 +1,198 @@ +/* + * Functionality for parsing and writing basic data types + */ +use nom; +use nom::{IResult}; +use byteorder::BigEndian; +//use std::io::Write; +use std::io; + + +pub type OWResult = Result; + +/* + * Parse functions + */ +//pub fn parse_byte_as_bits(input: &[u8]) -> IResult<&[u8], (u8, u8, u8, u8, u8, u8, u8, u8)> { +// nom::bits::bits(nom::sequence::tuple(( +// nom::bits::complete::take::<_, _, _, CustomError<_>>(1_usize), +// nom::bits::complete::take::<_, _, _, CustomError<_>>(1_usize), +// nom::bits::complete::take::<_, _, _, CustomError<_>>(1_usize), +// nom::bits::complete::take::<_, _, _, CustomError<_>>(1_usize), +// nom::bits::complete::take::<_, _, _, CustomError<_>>(1_usize), +// nom::bits::complete::take::<_, _, _, CustomError<_>>(1_usize), +// nom::bits::complete::take::<_, _, _, CustomError<_>>(1_usize), +// nom::bits::complete::take::<_, _, _, CustomError<_>>(1_usize), +// )))(input) +//} + + +pub fn parse_int2(input: &[u8]) -> IResult<&[u8], i16> { + nom::number::streaming::be_i16(input)? +} + +pub fn parse_int4(input: &[u8]) -> IResult<&[u8], i32> { + nom::number::streaming::be_i32(input)? +} + +pub fn decode_real8(int: u64) -> f64 { + // Convert GDS REAL8 to IEEE float64 + let neg = int & 0x8000_0000_0000_0000; + let exp = (int >> 56) & 0x7f; + let mut mant = (int & 0x00ff_ffff_ffff_ffff) as f64; + if neg != 0 { + mant *= -1 + } + mant * 2_f64.powi(4 * (exp - 64) - 56) +} + +pub fn parse_real8(input: &[u8]) -> IResult<&[u8], f64> { + let data = nom::number::streaming::be_u64(input)?; + IResult::Ok(decode_real8(data)) +} + + +pub fn parse_datetime(input: &[u8]) -> IResult<&[u8], [u16; 6]> { + let mut buf = [0_u16; 6]; + let mut parts = nom::multi::fill(parse_int2, &mut buf)(input); + parts[0] += 1900; // Year is from 1900 + IResult::Ok(parts) +} + + +pub fn parse_bitarray(input: &[u8]) -> IResult<&[u8], [bool; 16]> { + let bits = [false; 16]; + let (input, val) = parse_int2(input)?; + for ii in 0..16 { + bits[ii] = ((val >> (16 - 1 - ii)) & 0x01) == 1; + } + bits +} + + +pub fn parse_ascii(input: &[u8], length: usize) -> IResult<&[u8], Vec> { + let last = input[length - 1]; + let true_length = if last == '\0' { length - 1 } else { length }; + let vec = input[..true_length].to_vec(); + IResult::Ok((input[length..], vec)) +} + + +/* + * Pack functions + */ + +pub fn pack_bitarray(bits: &[bool; 16]) -> u16 { + let mut int: u16 = 0; + for ii in 0..16 { + int |= (bits[ii] as u16) << (16 - 1 - ii); + } + int +} + + +pub fn pack_int2(buf: &mut [u8], int: i16) { + BigEndian::write_i16(&mut buf, int) +} + +pub fn pack_int4(buf: &mut [u8], int: i32) { + BigEndian::write_i32(&mut buf, int) +} + +pub fn pack_real8(buf: &mut [u8], fnum: f64) { + BigEndian::write_u64(&mut buf, encode_real8(fnum)) +} + +pub fn pack_ascii(buf: &mut [u8], data: &[u8]) -> usize { + let len = data.len(); + buf[..len].copy_from_slice(data); + if len % 2 == 1 { + buf[len] = '\0'; + len + 1 + } else { + len + } +} + + +pub fn pack_datetime(buf: &mut [u8], date: &[u16; 6]) { + assert!(buf.len() >= 6 * 2); + let year = date[0] - 1900; + pack_int2(&mut buf, year); + for ii in 1..6 { + pack_int2(&mut buf[(2 * ii)..], date[ii]); + } +} + + +pub fn encode_real8(fnum: f64) -> u64 { + // Convert from float64 to GDS REAL8 representation. + + // Split the ieee float bitfields + let ieee = fnum.to_bits(); + let sign = ieee & 0x8000_0000_0000_0000; + let ieee_exp = (ieee >> 52) as i32 & 0x7ff; + let ieee_mant = ieee & 0xf_ffff_ffff_ffff; + + let subnorm = (ieee_exp == 0) & (ieee_mant != 0); + if (ieee_exp == 0) & (ieee_mant == 0) { + return 0 + } + + // IEEE normal double is (1 + ieee_mant / 2^52) * 2^(ieee_exp - 1023) + // IEEE subnormal double is (ieee_mant / 2^52) * 2^(-1022) + // GDS real8 is (gds_mant / 2^(7*8)) * 16^(gds_exp - 64) + // = (gds_mant / 2^56) * 2^(4 * gds_exp - 256) + + // Convert exponent. + let exp2 = if subnorm { -1022 } else {ieee_exp + 1 - 1023}; // +1 is due to mantissa (1.xxxx in IEEE vs 0.xxxxx in GDSII) + let mut exp16 = exp2 / 4; + let rest = exp2 % 4; + + // Compensate for exponent coarseness + let comp = rest != 0; + let mut shift; + if comp { + exp16 += 1; + shift = 4 - rest; + } else { + shift = rest; + } + shift -= 3; // account for gds bit position + + // add leading one + let mut gds_mant_unshifted = ieee_mant; + if !subnorm { + gds_mant_unshifted += 0x10_0000_0000_0000; + } + + let mut gds_mant = if shift > 0 { + gds_mant_unshifted >> shift + } else { + gds_mant_unshifted << -shift + }; + + // add gds exponent bias + let mut gds_exp = exp16 + 64; + + if gds_exp < -14 { + // number is too small + return 0 + } + + let neg_biased = gds_exp < 0; + if neg_biased { + gds_mant >>= gds_exp * 4; + gds_exp = 0; + } + + let too_big = (gds_exp > 0x7f) & !subnorm; + if too_big { + panic!("Number too big for real8 format"); //TODO error handling + } + + let gds_exp_bits = gds_exp << 56; + + let real8 = sign | gds_exp_bits | gds_mant; + real8 +} diff --git a/src/elements.rs b/src/elements.rs new file mode 100644 index 0000000..e972b77 --- /dev/null +++ b/src/elements.rs @@ -0,0 +1,481 @@ +/* + * Functionality for reading/writing elements (geometry, text labels, + * structure references) and associated properties. + */ + +//from .record import Record +// +use records::{BOX, BOUNDARY, NODE, PATH, TEXT, SREF, AREF, + DATATYPE, PATHTYPE, BOXTYPE, NODETYPE, TEXTTYPE, + LAYER, XY, WIDTH, COLROW, PRESENTATION, STRING, + STRANS, MAG, ANGLE, PROPATTR, PROPVALUE, + ENDEL, BGNEXTN, ENDEXTN, SNAME, + }; + +use records; +use record::RecordHeader; +use basic::{OWResult}; + +use std::collections::HashMap; +use std::io::Write; +use nom::IResult; + + +pub fn read_properties(input: &[u8]) -> IResult<&[u8], HashMap::>> { + /* + * Read element properties. + * + * Assumes PROPATTR records have unique values. + * Stops reading after consuming ENDEL record. + * + * Args: + * stream: Stream to read from. + * + * Returns: + * propattr: -> propvalue mapping + */ + let properties = HashMap{}; + + let (input, header) = RecordHeader::parse(input)?; + while header.tag != ENDEL::tag() { + if header.tag == PROPATTR::tag() { + let (input, key) = PROPATTR::read_data(input, header.data_size)?; + let (input, value) = PROPVALUE::read(input)?; + assert!(!properties.contains_key(key), format!{"Duplicate property key: {}", key}); + properties.insert(key, value); + } + let (input, header) = RecordHeader::parse(input)?; + } + Ok((input, properties)) +} + + +fn write_properties(ww: W, properties: &HashMap::>) -> OWResult { + /* + * Write element properties. + * + * This is does _not_ write the ENDEL record. + * + * Args: + * stream: Stream to write to. + */ + let mut size = 0; + for (key, value) in &properties { + size += PROPATTR::write(ww, key)?; + size += PROPVALUE::write(ww, value)?; + } + Ok(size) +} + +trait Element { + fn parse(input: &[u8]) -> Self; + /* + * Read from a stream to construct this object. + * Consumes up to (and including) the ENDEL record. + */ + + fn write(&self, ww: W) -> OWResult; + /* + * Write this element to a stream. + * Finishes with an ENDEL record. + */ +} + +struct Reference { + /* + * Datastructure representing + * an instance of a structure (SREF / structure reference) or + * an array of instances (AREF / array reference). + * Type is determined by the presence of the `colrow` tuple. + * + * Transforms are applied to each individual instance (_not_ + * to the instance's origin location || array vectors). + */ + struct_name: Vec, // Name of the structure being referenced. + invert_y: bool, // Whether to mirror the pattern (negate y-values / flip across x-axis). Default false. + mag: f64, // Scaling factor (default 1) """ + angle_deg: f64, // Rotation (degrees counterclockwise) + xy: Vec, + /* + * (For SREF) Location in the parent structure corresponding to the instance's origin (0, 0). + * (For AREF) 3 locations: + * [`offset`, + * `offset + col_basis_vector * colrow[0]`, + * `offset + row_basis_vector * colrow[1]`] + * which define the first instance's offset and the array's basis vectors. + * Note that many GDS implementations only support manhattan basis vectors, and some + * assume a certain axis mapping (e.g. x->columns, y->rows) and "reinterpret" the + * basis vectors to match it. + */ + + colrow: Option<(i32, i32)>, // Number of columns and rows (AREF) || None (SREF) + properties: HashMap::>, // Properties associated with this reference. +} + +impl Element for Reference { + fn parse(input: &[u8]) -> IResult<&[u8], Self> { + let mut invert_y = false; + let mut mag = 1; + let mut angle_deg = 0; + let mut colrow = None; + let (input, mut struct_name) = SNAME::skip_and_read(input)?; + + let (input, mut header) = RecordHeader::parse(input)?; + while header.tag != records::RTAG_XY { + match header.tag { + records::RTAG_STRANS => + {let (input, invert_y) = STRANS::read_data(input, header.data_size)?[0];}, + records::RTAG_MAG => + {let (input, mag) = MAG::read_data(input, header.data_size)?;}, + records::RTAG_ANGLE => + {let (input, angle_deg) = ANGLE::read_data(input, header.data_size)?;}, + records::RTAG_COLROW => + {let (input, colrow) = COLROW::read_data(input, header.data_size)?;}, + _ => + return Err(format!("Unexpected tag {:04x}", header.tag)), + }; + let (input, header) = RecordHeader::parse(input)?; + } + let (input, xy) = XY::read_data(input, header.data_size)?; + let (input, properties) = read_properties(input)?; + Reference{ + struct_name: struct_name, + xy: xy, + properties: properties, + colrow: colrow, + invert_y: invert_y, + mag: mag, + angle_deg: angle_deg + } + } + + fn write(&self, ww: W) -> OWResult { + let mut size = 0; + size += match self.colrow { + None => SREF::write(ww)?, + Some(_) => AREF::write(ww)?, + }; + + size += SNAME::write(ww, self.struct_name)?; + if self.angle_deg != 0 || self.mag != 1 || self.invert_y { + size += STRANS::write(ww, (self.invert_y as u16) << 15)?; + if self.mag != 1 { + size += MAG::write(ww, self.mag)?; + } + if self.angle_deg !=0 { + size += ANGLE::write(ww, self.angle_deg)?; + } + } + + if self.colrow.is_some() { + size += COLROW::write(ww, self.colrow)?; + } + + size += XY::write(ww, self.xy)?; + size += write_properties(ww, self.properties)?; + size += ENDEL::write(ww, None)?; + Ok(size) + } +} + +impl Reference { + pub fn check(&self) { + if self.colrow.is_some() { + assert!(self.xy.len() != 6, format!("colrow is Some, so expected size-6 xy. Got {}", self.xy)); + } else { + assert!(self.xy.len() != 2, format!("Expected size-2 xy. Got {}", self.xy)); + } + } +} + + +struct Boundary { + /* + * Datastructure representing a Boundary element. + */ + layer: (i16, i16), // (layer, data_type) tuple + xy: Vec, // Ordered vertices of the shape. First and last points should be identical. Order x0, y0, x1,... + properties: HashMap::>, // Properties for the element. +} + +impl Element for Boundary { + fn parse(input: &[u8]) -> IResult<&[u8], Self> { + let (input, layer) = LAYER::skip_and_read(input)?; + let (input, dtype) = DATATYPE::read(input)?; + let (input, xy) = XY::read(input)?; + let (input, properties) = read_properties(input)?; + Boundary{ + layer: (layer, dtype), + xy: xy, + properties: properties, + } + } + + fn write(&self, ww: W) -> OWResult { + let mut size = 0; + size += BOUNDARY::write(ww)?; + size += LAYER::write(ww, self.layer.0)?; + size += DATATYPE::write(ww, self.layer.1)?; + size += XY::write(ww, self.xy)?; + size += write_properties(ww, self.properties)?; + size += ENDEL::write(ww)?; + Ok(size) + } +} + + +struct Path { + /* + * Datastructure representing a Path element. + + * If `path_type < 4`, `extension` values are not written. + * During read, `exension` defaults to (0, 0) even if unused. + */ + layer: (i16, i16), // (layer, data_type) tuple + path_type: i16, // End-cap type (0: flush, 1: circle, 2: square, 4: custom) + width: i16, // Path width + extension: (i32, i32), // Extension when using path_type=4. Ignored otherwise. + xy: Vec, // Path centerline coordinates. [x0, y0, x1, y1,...] + properties: HashMap::>, //Properties for the element. +} + +impl Element for Path { + fn parse(input: &[u8]) -> IResult<&[u8], Self> { + let mut path_type = 0; + let mut width = 0; + let mut bgn_ext = 0; + let mut end_ext = 0; + let (input, mut layer) = LAYER::skip_and_read(input)?; + let (input, mut dtype) = DATATYPE::read(input)?; + + let (input, mut header) = RecordHeader::parse(&input)?; + while header.tag != records::RTAG_XY { + match header.tag { + records::RTAG_PATHTYPE => + {let (input, path_type) = PATHTYPE::read_data(input, header.data_size)?;}, + records::RTAG_WIDTH => + {let (input, width) = WIDTH::read_data(input, header.data_size)?;}, + records::RTAG_BGNEXTN => + {let (input, bgn_ext) = BGNEXTN::read_data(input, header.data_size)?;}, + records::RTAG_ENDEXTN => + {let (input, end_ext) = ENDEXTN::read_data(input, header.data_size)?;}, + _ => + return Err(format!("Unexpected tag {:04x}", header.tag)), + }; + let (input, header) = RecordHeader::parse(&input)?; + } + let (input, xy) = XY::read_data(input, header.data_size)?; + let (input, properties) = read_properties(input)?; + Path{ + layer: (layer, dtype), + xy: xy, + properties: properties, + extension: (bgn_ext, end_ext), + path_type: path_type, + width: width, + } + } + + fn write(&self, ww: W) -> OWResult { + let mut size = 0; + size += PATH::write(ww)?; + size += LAYER::write(ww, self.layer[0])?; + size += DATATYPE::write(ww, self.layer[1])?; + if self.path_type != 0 { + size += PATHTYPE::write(ww, self.path_type)?; + } + if self.width != 0 { + size += WIDTH::write(ww, self.width)?; + } + + if self.path_type < 4 { + let (bgn_ext, end_ext) = self.extension; + if bgn_ext != 0 { + size += BGNEXTN::write(ww, bgn_ext)?; + } + if end_ext != 0 { + size += ENDEXTN::write(ww, end_ext)?; + } + } + size += XY::write(ww, self.xy)?; + size += write_properties(ww, self.properties)?; + size += ENDEL::write(ww)?; + Ok(size) + } +} + + +struct GDSBox { + /* + * Datastructure representing a Box element. Rarely used. + */ + layer: (i16, i16), // (layer, box_type) tuple + xy: Vec, // Box coordinates (5 pairs) + properties: HashMap::>, // Properties for the element. +} + +impl Element for GDSBox { + fn parse(input: &[u8]) -> IResult<&[u8], Self> { + let (input, layer) = LAYER::skip_and_read(input)?; + let (input, dtype) = BOXTYPE::read(input)?; + let (input, xy) = XY::read(input)?; + let (input, properties) = read_properties(input)?; + GDSBox{ + layer: (layer, dtype), + xy: xy, + properties: properties, + } + } + + fn write(&self, ww: W) -> OWResult { + let mut size = 0; + size += BOX::write(ww)?; + size += LAYER::write(ww, self.layer[0])?; + size += BOXTYPE::write(ww, self.layer[1])?; + size += XY::write(ww, self.xy)?; + size += write_properties(ww, self.properties)?; + size += ENDEL::write(ww)?; + Ok(size) + } +} + + +struct Node { + /* + * Datastructure representing a Node element. Rarely used. + */ + layer: (i16, i16), // (layer, box_type) tuple + xy: Vec, // 1-50 pairs of coordinates. + properties: HashMap::>, // Properties for the element. +} + +impl Element for Node { + fn parse(input: &[u8]) -> IResult<&[u8], Self> { + let (input, layer) = LAYER::skip_and_read(input)?; + let (input, dtype) = NODETYPE::read(input)?; + let (input, xy) = XY::read(input)?; + let (input, properties) = read_properties(input)?; + Node{ + layer: (layer, dtype), + xy: xy, + properties: properties, + } + } + + fn write(&self, ww: W) -> OWResult { + let mut size = 0; + size += NODE::write(ww)?; + size += LAYER::write(ww, self.layer[0])?; + size += NODETYPE::write(ww, self.layer[1])?; + size += XY::write(ww, self.xy)?; + size += write_properties(ww, self.properties)?; + size += ENDEL::write(ww)?; + Ok(size) + } +} + +struct Text { + /* + * Datastructure representing a text label. + */ + layer: (i16, i16), // (layer, node_type) tuple + presentation: [bool; 16], + /* + * Bit array. Default all zeros. + * bits 0-1: 00 left/01 center/10 right + * bits 2-3: 00 top/01 middle/10 bottom + * bits 4-5: font number + */ + path_type: i16, // Default 0 + width: i32, // Default 0 + invert_y: bool, // Vertical inversion. Default false. + mag: f64, // Scaling factor. Default 1. + angle_deg: f64, // Rotation (ccw). Default 0. + xy: Vec, // Position (1 pair only) + string: Vec, // Text content + properties: HashMap::> // Properties for the element. +} + +impl Element for Text { + fn parse(input: &[u8]) -> IResult<&[u8], Self> { + let mut path_type = 0; + let mut presentation = 0; + let mut invert_y = false; + let mut width = 0; + let mut mag = 1; + let mut angle_deg = 0; + let (input, layer) = LAYER::skip_and_read(input)?; + let (input, dtype) = TEXTTYPE::read(input)?; + + let mut header = RecordHeader::parse(input)?; + while header.tag != records::RTAG_XY { + match header.tag { + records::RTAG_PRESENTATION => + {let (input, presentation) = PRESENTATION::read_data(input, header.data_size)?;}, + records::RTAG_PATHTYPE => + {let (input, path_type) = PATHTYPE::read_data(input, header.data_size)?;}, + records::RTAG_WIDTH => + {let (input, width) = WIDTH::read_data(input, header.data_size)?;}, + records::RTAG_STRANS => { + let (input, strans) = STRANS::read_data(input, header.data_size)?; + invert_y = strans[0]; + }, + records::RTAG_MAG => + {let (input, mag) = MAG::read_data(input, header.data_size)?;}, + records::RTAG_ANGLE => + {let (input, angle_deg) = ANGLE::read_data(input, header.data_size)?;}, + _ => + return Err(format!("Unexpected tag {:04x}", header.tag)), + } + let (input, header) = RecordHeader::parse(input)?; + } + let (input, xy) = XY::read_data(input, header.data_size)?; + + let (input, string) = STRING::read(input)?; + let (input, properties) = read_properties(input)?; + Text{ + layer: (layer, dtype), + xy: xy, + properties: properties, + string: string, + presentation: presentation, + path_type: path_type, + width: width, + invert_y: invert_y, + mag: mag, + angle_deg: angle_deg, + } + } + + fn write(&self, ww: W) -> OWResult { + let mut size = 0; + size += TEXT::write(ww)?; + size += LAYER::write(ww, self.layer[0])?; + size += TEXTTYPE::write(ww, self.layer[1])?; + if self.presentation != 0 { + size += PRESENTATION::write(ww, self.presentation)?; + } + if self.path_type != 0 { + size += PATHTYPE::write(ww, self.path_type)?; + } + if self.width != 0 { + size += WIDTH::write(ww, self.width)? + } + if self.angle_deg != 0 || self.mag != 1 || self.invert_y { + let strans = [false; 16]; + strans[0] = self.invert_y; + size += STRANS::write(ww, strans)?; + if self.mag != 1 { + size += MAG::write(ww, self.mag)?; + } + if self.angle_deg !=0 { + size += ANGLE::write(ww, self.angle_deg)?; + } + } + size += XY::write(ww, self.xy)?; + size += STRING::write(ww, self.string)?; + size += write_properties(ww, self.properties)?; + size += ENDEL::write(ww)?; + Ok(size) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..d1c5fd4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,7 @@ +extern crate nom; +extern crate byteorder; + +pub mod basic; +pub mod record; +pub mod records; +pub mod elements; diff --git a/src/library.rs b/src/library.rs new file mode 100644 index 0000000..7a8573a --- /dev/null +++ b/src/library.rs @@ -0,0 +1,282 @@ +/* + *File-level read/write functionality. + */ + +use nom; +use std::io::Write; +use std::collections:HashMap; + +use record; +use records; +use elements; +//from .records import HEADER, BGNLIB, ENDLIB, UNITS, LIBNAME +//from .records import BGNSTR, STRNAME, ENDSTR, SNAME, COLROW, ENDEL +//from .records import BOX, BOUNDARY, NODE, PATH, TEXT, SREF, AREF +//from .elements import Element, Reference, Text, Box, Boundary, Path, Node + + +pub struct FileHeader { + /* + * Representation of the GDS file header. + * + * File header records: HEADER BGNLIB LIBNAME UNITS + * Optional records are ignored if present and never written. + * + * Version is written as `600`. + */ + name: Vec, // Library name + user_units_per_db_unit: f64, // Number of user units in one database unit + meters_per_db_unit: f64, // Number of meters in one database unit + mod_time: [u16; 6], // Last-modified time [y, m, d, h, m, s] + acc_time: [u16; 6], // Last-accessed time [y, m, d, h, m, s] +} + +impl FileHeader { + pub fn new(name: &[u8], meters_per_db_unit: f64, user_units_per_db_unit: f64) -> Self { + FileHeader{ + mod_time: [0, 1, 1, 0, 0, 0]; + acc_time: [0, 1, 1, 0, 0, 0]; + name: name.to_owned(), + user_units_per_db_unit: user_units_per_db_unit, + meters_per_db_unit: meters_per_db_unit, + } + } + + pub fn read(input: &[u8]) -> IResult<&[u8], Self> { + /* + * Read and construct a header from the provided stream. + * + * Args: + * stream: Seekable stream to read from + * + * Returns: + * FileHeader object + */ + let (input, version) = records::HEADER.read(input)?; + let (input, (mod_time, acc_time)) = records::BGNLIB.read(input)?; + let (input, name) = records::LIBNAME.skip_and_read(input)?; + let (input, (uu, dbu)) = records::UNITS.skip_and_read(input)?; + + FileHeader{ + mod_time: mod_time, + acc_time: acc_time, + name: name, + user_units_per_db_unit: uu, + meters_per_db_unit: dbu, + } + } + + pub fn write(&self, ww: W) -> OWResult { + /* + * Write the header to a stream + * + * Args: + * stream: Stream to write to + * + * Returns: + * number of bytes written + */ + let mut size = 0; + size += records::HEADER.write(stream, 600) + size += records::BGNLIB.write(stream, [self.mod_time, self.acc_time]) + size += records::LIBNAME.write(stream, self.name) + size += records::UNITS.write(stream, (self.user_units_per_db_unit, self.meters_per_db_unit)) + Ok(size) + } +} + + +pub fn scan_structs(input: &[u8]) -> HashMap::, usize> { + /* + * Scan through a GDS file, building a table of + * {b'structure_name': byte_offset}. + * The intent of this function is to enable random access + * and/or partial (structure-by-structure) reads. + * + * Args: + * stream: Seekable stream to read from. Should be positioned + * before the first structure record, but possibly + * already past the file header. + */ + let input_size = input.len(); + let positions = HashMap{}; + + let (input, header) = RecordHeader.parse(input)?; + while header.tag != records::RTAG_ENDLIB { + let (input, _) = nom::bytes::streaming::take(size)(input)?; + if tag == records::RTAG_BGNSTR { + let (input, name) = records::STRNAME.read(input)?; + if positions.conains_key(name) { + return Err(format!("Duplicate structure name: {}", name)); + } + let position = input_size - input.len(); + positions.insert(name, position); + } + let (input, header) = RecordHeader.parse(input)?; + } + positions +} + + +pub struct Cell { + +} + +impl Cell { + pub fn write( + &self, + ww: Write, + name: &[u8], + cre_time: Option<[i16; 6]>, + mod_time: Option<[i16; 6]>, + ) -> OWResult { + /* + * Write a structure to the provided stream. + * + * Args: + * name: Structure name (ascii-encoded). + * elements: List of Elements containing the geometry and text in this struct. + * cre_time: Creation time (optional). + * mod_time: Modification time (optional). + * + * Return: + * Number of bytes written + */ + let mut size = 0; + size += BGNSTR.write(ww, (cre_time, mod_time)) + size += STRNAME.write(ww, name) + size += cell.write(ww) + size += ENDSTR.write(ww) + Ok(size) + } +} + +pub fn try_read_struct(input: &[u8]) -> IResult<&[u8], Option<(Vec, Cell>)> { + /* + * Skip to the next structure and attempt to read it. + * + * Args: + * stream: Seekable stream to read from. + * + * Returns: + * (name, elements) if a structure was found. + * None if no structure was found before the end of the library. + */ + let (input, success) = records::BGNSTR.skip_past(input)?; + if !success { + return None + } + + let (input, name) = records::STRNAME.read(input)?; + let (input, elements) = Cell::read_elements(input)?; + Some((name, elements)) +} + + + +pub fn read_elements(stream: BinaryIO) -> List[Element] { + /* + * Read elements from the stream until an ENDSTR + * record is encountered. The ENDSTR record is also + * consumed. + * + * Args: + * stream: Seekable stream to read from. + * + * Returns: + * List of element objects. + */ + let (input, header) = RecordHeader.parse(input)?; + while header.tag != records::RTAG_ENDSTR { + match header.tag { + records::RTAG_BOUNDARY => { + let (input, boundary) = records::BOUNDARY.read(input)?; + cell.boundaries.insert(boundary); + }, + records::RTAG_PATH => { + let (input, path) = records::PATH.read(input)?; + cell.paths.insert(path); + }, + records::RTAG_NODE => { + let (input, node) = records::NODE.read(input)?; + cell.nodes.insert(node); + }, + records::RTAG_BOX => { + let (input, gds_box) = records::BOX.read(input)?; + cell.boxes.insert(gds_box); + }, + records::RTAG_TEXT => { + let (input, txt) = records::TEXT.read(input)?; + cell.texts.insert(txt); + }, + records::RTAG_SREF => { + let (input, sref) = records::SREF.read(input)?; + cell.refs.insert(sref); + }, + records::RTAG_AREF => { + let (input, aref) = records::AREF.read(input)?; + cell.refs.insert(aref); + }, + _ => { + // don't care, skip + let (input, _) = nom::bytes::streaming::take(size)(input)?; + } + } + let (input, header) = RecordHeader.parse(input)?; + } + Ok((input, data)) +} + + +pub fn scan_hierarchy(input: &[u8]) -> IResult<&[u8], HashMap::, HashMap::, u32>>> { + /* + * Scan through a GDS file, building a table of instance counts + * `{b'structure_name': {b'ref_name': count}}`. + * + * This is intended to provide a fast overview of the file's + * contents without performing a full read of all elements. + * + * Args: + * stream: Seekable stream to read from. Should be positioned + * before the first structure record, but possibly + * already past the file header. + */ + let structures = HashMap{}; + + let mut ref_name = None + let mut ref_count = None + let mut cur_structure = HashMap{}; + + let (input, header) = Record.read_header(stream) + while header.tag != records::RTAG_ENDLIB { + match header.tag { + records::RTAG_BGNSTR => { + let (input, _) = nom::bytes::streaming::take(size)(input)?; + let (input, name) = records::STRNAME.read(input)?; + if structures.contains_key(name) { + return Err(format!("Duplicate structure name: {}", name)); + } + cur_structure = HashMap{}; + structures.insert(name, cur_structure); + ref_name = None; + ref_count = None; + }, + records::RTAG_SNAME => { + let (input, sname) = SNAME.read_data(input, header.data_size)?; + ref_name = Some(sname); + }, + records::RTAG_COLROW => { + let (input, colrow) = COLROW.read_data(input, header.data_size); + ref_count = colrow[0] * colrow[1]; + }, + records::RTAG_ENDEL => { + *cur_structure.entry(ref_name.unwrap()).or_insert(0) += ref_count.unwrap_or(1); + }, + _ => { + let (input, _) = nom::bytes::streaming::take(size)(input)?; + }, + } + let (input, header) = RecordHeader.parse(input)?; + } + structures +} diff --git a/src/record.rs b/src/record.rs new file mode 100644 index 0000000..5f6775b --- /dev/null +++ b/src/record.rs @@ -0,0 +1,219 @@ +/* + * Generic record-level read/write functionality. + */ +use nom::IResult; +use std::io::Write; +use byteorder::BigEndian; + + +use basic::{pack_datetime, pack_bitarray, pack_ascii, pack_int2, pack_int4, pack_real8}; #[warn(unused_imports)] +use basic::{parse_datetime, parse_bitarray, parse_ascii, parse_int2, parse_int4, parse_real8}; #[warn(unused_imports)] +use basic::{OWResult}; +use records; +//from .basic import parse_int2, parse_int4, parse_real8, parse_datetime, parse_bitarray +//from .basic import pack_int2, pack_int4, pack_real8, pack_datetime, pack_bitarray +//from .basic import parse_ascii, read + + +//#[no_mangle] +//pub extern "C" fn write_record_header( + + +#[repr(C)] +pub struct RecordHeader { + pub tag: u16, + pub data_size: u16, +} + +impl RecordHeader { + pub fn parse(input: &[u8]) -> IResult<&[u8], RecordHeader> { + let (_, size) = nom::number::streaming::be_u16(input[0..])?; + let (_, tag) = nom::number::streaming::be_u16(input[2..])?; + Ok((input[4..], RecordHeader{tag:tag, data_size:size - 4})) + } + + pub fn pack(self) -> [u8; 4] { + assert!(self.size < 0xffff - 4, "Record too big!"); + let vals = [self.size, self.tag]; + let mut buf = [0x77; 4]; + BigEndian::write_u16_into(&vals, &mut buf); + buf + } + + pub fn write(&self, ww: W) -> OWResult { + let bytes = self.pack(); + ww.write(bytes) + } +} + + +pub trait Record { + fn tag() -> u32; + fn expected_size() -> Option; + + //fn parse_data(input: &[u8], size: usize) -> IResult<&[u8], Self>; +} + +impl Record { + pub fn check_size(&self, actual_size: usize) -> Result<(), &str> { + match self.expected_size() { + Some(size) => if size == actual_size { + Ok(()) + } else { + Err(format!("Expected record size {}, got {}", size, actual_size)) + }, + None => Ok(()), + } + } + + pub fn parse_header(input: &[u8]) -> IResult<&[u8], RecordHeader> { + RecordHeader::parse(input) + } + + pub fn write_header(ww: W, data_size: usize) -> OWResult { + RecordHeader{tag: Self.tag(), size: data_size}.write(ww) + } + + pub fn skip_past(input: &[u8]) -> IResult<&[u8], bool> { + /* + * Skip to the end of the next occurence of this record. + * + * Return: + * True if the record was encountered and skipped. + * False if the end of the library was reached. + */ + let (input, header) = RecordHeader::parse(input)?; + while header.tag != Self.tag() { + let (input, _) = nom::bytes::streaming::take(header.size)?; + if header.tag == records::RTAG_ENDLIB { + return Ok((input, false)) + } + let (input, header) = RecordHeader::parse(input)?; + } + let (input, _) = nom::bytes::streaming::take(header.size)?; + Ok((input, true)) + } + + /* + pub fn skip_and_read(input: &[u8]) -> IResult<&[u8], bool>{ + size, tag = Record.read_header(stream) + while tag != cls.tag{ + stream.seek(size, io.SEEK_CUR) + size, tag = Record.read_header(stream) + } + data = cls.read_data(stream, size) + return data + } + + def read(cls: Type[R], stream: BinaryIO){ + size = expect_record(stream, cls.tag) + data = cls.read_data(stream, size) + return data + } + + def write(cls, stream: BinaryIO, data) -> int { + data_bytes = cls.pack_data(data) + b = cls.write_header(stream, len(data_bytes)) + b += stream.write(data_bytes) + return b + } + */ +} + + +pub trait BitArray { + fn parse_data(input: &[u8]) -> IResult<&[u8], [bool; 16]> { + parse_bitarray(input) + } + + fn pack_data(buf: &mut [u8], vals: &[bool; 16]) { + pack_bitarray(&mut buf, vals) + } +} + +pub trait Int2 { + fn parse_data(input: &[u8]) -> IResult<&[u8], i16> { + parse_int2(input) + } + + fn pack_data(buf: &mut [u8], val: i16) { + pack_int2(&mut buf, val) + } +} + +pub trait Int4 { + fn parse_data(input: &[u8]) -> IResult<&[u8], i32> { + parse_int4(input) + } + + fn pack_data(buf: &mut [u8], val: i32) { + pack_int4(&mut buf, val) + } +} + +pub trait Int2Array { + fn parse_data(input: &[u8], size: usize) -> IResult<&[u8], Vec> { + assert!(size % 2 == 0, "Record must contain an integer quantity of integers"); + nom::multi::count(parse_int2, size / 2)(input) + } + + fn pack_data(buf: &mut [u8], vals: &[i16]) { + BigEndian::write_i16_into(&vals, &mut buf) + } +} + + +pub trait Int4Array { + fn parse_data(input: &[u8], size: usize) -> IResult<&[u8], Vec> { + assert!(size % 4 == 0, "Record must contain an integer quantity of integers"); + nom::multi::count(parse_int4, size / 4)(input) + } + + fn pack_data(buf: &mut [u8], vals: &[i32]) { + BigEndian::write_i32_into(&vals, &mut buf) + } +} + +pub trait Real8 { + fn parse_data(input: &[u8]) -> IResult<&[u8], f64> { + parse_real8(input) + } + + fn pack_data(buf: &mut [u8], val: f64) { + pack_real8(&mut buf, val) + } +} + +pub trait ASCII { + fn parse_data(input: &[u8]) -> IResult<&[u8], Vec> { + parse_ascii(input) + } + + fn pack_data(buf: &mut [u8], data: &[u8]) { + pack_ascii(&mut buf, data) + } +} + +pub trait DateTime { + fn parse_data(input: &[u8]) -> IResult<&[u8], [u16; 6]> { + parse_datetime(input) + } + + fn pack_data(buf: &mut [u8], data: [u16; 6]) { + pack_datetime(&mut buf, data) + } +} + +impl DTR { + fn skip_and_read(input: &[u8]) -> IResult<&[u8], [DTR; 2]> { + let mut header = Self.read_header(input)?; + while header.tag != Self.tag() { + nom::bytes::streaming::take(header.data_size)?; + header = Self.read_header(input)?; + } + assert!(header.data_size == 6 * 2); + let data0 = Self.read_data(&input)?; + let data1 = Self.read_data(&input)?; + Ok([data0, data1]) + } +} diff --git a/src/records.rs b/src/records.rs new file mode 100644 index 0000000..92c5ad4 --- /dev/null +++ b/src/records.rs @@ -0,0 +1,595 @@ +/* + * Record type and tag definitions + */ + +use record::{Record, Int2, Int4, Int2Array, Int4Array, Real8, DateTime, BitArray, ASCII}; +//use basic::{OWResult}; + +//use std::io::Write; + +// record tags +pub const RTAG_HEADER: u16 = 0x0002; +pub const RTAG_BGNLIB: u16 = 0x0102; +pub const RTAG_LIBNAME: u16 = 0x0206; +pub const RTAG_UNITS: u16 = 0x0305; // (user_units_per_db_unit, db_units_per_meter) +pub const RTAG_ENDLIB: u16 = 0x0400; +pub const RTAG_BGNSTR: u16 = 0x0502; +pub const RTAG_STRNAME: u16 = 0x0606; +pub const RTAG_ENDSTR: u16 = 0x0700; +pub const RTAG_BOUNDARY: u16 = 0x0800; +pub const RTAG_PATH: u16 = 0x0900; +pub const RTAG_SREF: u16 = 0x0a00; +pub const RTAG_AREF: u16 = 0x0b00; +pub const RTAG_TEXT: u16 = 0x0c00; +pub const RTAG_LAYER: u16 = 0x0d02; +pub const RTAG_DATATYPE: u16 = 0x0e02; +pub const RTAG_WIDTH: u16 = 0x0f03; +pub const RTAG_XY: u16 = 0x1003; +pub const RTAG_ENDEL: u16 = 0x1100; +pub const RTAG_SNAME: u16 = 0x1206; +pub const RTAG_COLROW: u16 = 0x1302; +pub const RTAG_NODE: u16 = 0x1500; +pub const RTAG_TEXTTYPE: u16 = 0x1602; +pub const RTAG_PRESENTATION: u16 = 0x1701; +pub const RTAG_SPACING: u16 = 0x1802; // unused; not sure about 02 +pub const RTAG_STRING: u16 = 0x1906; +pub const RTAG_STRANS: u16 = 0x1a01; +pub const RTAG_MAG: u16 = 0x1b05; +pub const RTAG_ANGLE: u16 = 0x1c05; +pub const RTAG_UINTEGER: u16 = 0x1d02; // unused; not sure about 02 +pub const RTAG_USTRING: u16 = 0x1e06; // unused; not sure about 06 +pub const RTAG_REFLIBS: u16 = 0x1f06; +pub const RTAG_FONTS: u16 = 0x2006; +pub const RTAG_PATHTYPE: u16 = 0x2102; +pub const RTAG_GENERATIONS: u16 = 0x2202; +pub const RTAG_ATTRTABLE: u16 = 0x2306; +pub const RTAG_STYPTABLE: u16 = 0x2406; // unused; not sure about 06 +pub const RTAG_STRTYPE: u16 = 0x2502; // unused +pub const RTAG_ELFLAGS: u16 = 0x2601; +pub const RTAG_ELKEY: u16 = 0x2703; // unused +pub const RTAG_LINKTYPE: u16 = 0x2803; // unused +pub const RTAG_LINKKEYS: u16 = 0x2903; // unused +pub const RTAG_NODETYPE: u16 = 0x2a02; +pub const RTAG_PROPATTR: u16 = 0x2b02; +pub const RTAG_PROPVALUE: u16 = 0x2c06; +pub const RTAG_BOX: u16 = 0x2d00; +pub const RTAG_BOXTYPE: u16 = 0x2e02; +pub const RTAG_PLEX: u16 = 0x2f03; +pub const RTAG_BGNEXTN: u16 = 0x3003; +pub const RTAG_ENDEXTN: u16 = 0x3103; +pub const RTAG_TAPENUM: u16 = 0x3202; +pub const RTAG_TAPECODE: u16 = 0x3302; +pub const RTAG_STRCLASS: u16 = 0x3401; +pub const RTAG_RESERVED: u16 = 0x3503; +pub const RTAG_FORMAT: u16 = 0x3602; +pub const RTAG_MASK: u16 = 0x3706; // list of Layers and dtypes +pub const RTAG_ENDMASKS: u16 = 0x3800; // end of MASKS records +pub const RTAG_LIBDIRSIZE: u16 = 0x3902; +pub const RTAG_SRFNAME: u16 = 0x3a06; +pub const RTAG_LIBSECUR: u16 = 0x3b02; +pub const RTAG_BORDER: u16 = 0x3c00; +pub const RTAG_SOFTFENCE: u16 = 0x3d00; +pub const RTAG_HARDFENCE: u16 = 0x3f00; +pub const RTAG_SOFTWIRE: u16 = 0x3f00; +pub const RTAG_HARDWIRE: u16 = 0x4000; +pub const RTAG_PATHPORT: u16 = 0x4100; +pub const RTAG_NODEPORT: u16 = 0x4200; +pub const RTAG_USERCONSTRAINT: u16 = 0x4300; +pub const RTAG_SPACERERROR: u16 = 0x4400; +pub const RTAG_CONTACT: u16 = 0x4500; + +/* +// data types +pub const DATA_TYPE_NONE: u16 = 0x00; +pub const DATA_TYPE_BIT: u16 = 0x01; +pub const DATA_TYPE_INT16: u16 = 0x02; +pub const DATA_TYPE_INT32: u16 = 0x03; +pub const DATA_TYPE_REAL32: u16 = 0x04; +pub const DATA_TYPE_REAL64: u16 = 0x05; +pub const DATA_TYPE_STR: u16 = 0x06; + +pub const MAX_DATA_SIZE: usize = 8; + +/// Returns the size of the given data type in bytes. +pub fn data_size(t: u16) -> usize { + match t { + x if x == DATA_TYPE_NONE => 0, + x if x == DATA_TYPE_BIT => 2, + x if x == DATA_TYPE_INT16 => 2, + x if x == DATA_TYPE_INT32 => 4, + x if x == DATA_TYPE_REAL32 => 4, + x if x == DATA_TYPE_REAL64 => 8, + _ => 0 + } +*/ + +pub struct HEADER {} +impl Record for HEADER { + fn tag() -> u16 { RTAG_HEADER } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for HEADER {} + +pub struct BGNLIB {} +impl Record for BGNLIB { + fn tag() -> u16 { RTAG_BGNLIB } + fn expected_size() -> usize { Some(2 * 6) } +} +impl DateTime for BGNLIB {} + +pub struct LIBNAME {} +impl Record for LIBNAME { + fn tag() -> u16 { RTAG_LIBNAME } + fn expected_size() -> usize { None } +} +impl ASCII for LIBNAME {} + +pub struct UNITS {} +impl Record for UNITS { + // (user_units_per_db_unit, db_units_per_meter) + fn tag() -> u16 { RTAG_UNITS } + fn expected_size() -> usize { Some(2 * 8) } +} +impl Real8 for UNITS {} + +pub struct ENDLIB {} +impl Record for ENDLIB { + fn tag() -> u16 { RTAG_ENDLIB } + fn expected_size() -> usize { Some(0) } +} + +pub struct BGNSTR {} +impl Record for BGNSTR { + fn tag() -> u16 { RTAG_BGNSTR } + fn expected_size() -> usize { Some(2 * 6) } +} +impl DateTime for ENDLIB {} + +pub struct STRNAME {} +impl Record for STRNAME { + fn tag() -> u16 { RTAG_STRNAME } + fn expected_size() -> usize { Some(2 * 6) } +} +impl ASCII for STRNAME {} + +pub struct ENDSTR {} +impl Record for ENDSTR { + fn tag() -> u16 { RTAG_ENDSTR } + fn expected_size() -> usize { Some(0) } +} + +pub struct BOUNDARY {} +impl Record for BOUNDARY { + fn tag() -> u16 { RTAG_BOUNDARY } + fn expected_size() -> usize { Some(0) } +} + +pub struct PATH {} +impl Record for PATH { + fn tag() -> u16 { RTAG_PATH } + fn expected_size() -> usize { Some(0) } +} + +pub struct SREF {} +impl Record for SREF { + fn tag() -> u16 { RTAG_SREF } + fn expected_size() -> usize { Some(0) } +} + +pub struct AREF {} +impl Record for AREF { + fn tag() -> u16 { RTAG_AREF } + fn expected_size() -> usize { Some(0) } +} + +pub struct TEXT {} +impl Record for TEXT { + fn tag() -> u16 { RTAG_TEXT } + fn expected_size() -> usize { Some(0) } +} + +pub struct LAYER {} +impl Record for LAYER { + fn tag() -> u16 { RTAG_LAYER } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for LAYER {} + +pub struct DATATYPE {} +impl Record for DATATYPE { + fn tag() -> u16 { RTAG_DATATYPE } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for DATATYPE {} + +pub struct WIDTH {} +impl Record for WIDTH { + fn tag() -> u16 { RTAG_WIDTH } + fn expected_size() -> usize { Some(4) } +} +impl Int4 for WIDTH {} + +pub struct XY {} +impl Record for XY { + fn tag() -> u16 { RTAG_XY } + fn expected_size() -> usize { None } +} +impl Int4Array for XY {} + +pub struct ENDEL {} +impl Record for ENDEL { + fn tag() -> u16 { RTAG_ENDEL } + fn expected_size() -> usize { Some(0) } +} + +pub struct SNAME {} +impl Record for SNAME { + fn tag() -> u16 { RTAG_SNAME } + fn expected_size() -> usize { None } +} +impl ASCII for SNAME {} + +pub struct COLROW {} +impl Record for COLROW { + fn tag() -> u16 { RTAG_COLROW } + fn expected_size() -> usize { Some(4) } +} +impl Int2Array for COLROW {} + +pub struct NODE {} +impl Record for NODE { + fn tag() -> u16 { RTAG_NODE } + fn expected_size() -> usize { Some(0) } +} + +pub struct TEXTTYPE {} +impl Record for TEXTTYPE { + fn tag() -> u16 { RTAG_TEXTTYPE } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for TEXTTYPE {} + +pub struct PRESENTATION {} +impl Record for PRESENTATION { + fn tag() -> u16 { RTAG_PRESENTATION } + fn expected_size() -> usize { Some(2) } +} +impl BitArray for PRESENTATION {} + +pub struct SPACING {} +impl Record for SPACING { + fn tag() -> u16 { RTAG_SPACING } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for SPACING {} + +pub struct STRING {} +impl Record for STRING { + fn tag() -> u16 { RTAG_STRING } + fn expected_size() -> usize { None } +} +impl ASCII for STRING {} + +pub struct STRANS {} +impl Record for STRANS { + fn tag() -> u16 { RTAG_STRANS } + fn expected_size() -> usize { Some(2) } +} +impl BitArray for STRANS {} + +pub struct MAG {} +impl Record for MAG { + fn tag() -> u16 { RTAG_MAG } + fn expected_size() -> usize { Some(8) } +} +impl Real8 for MAG {} + +pub struct ANGLE {} +impl Record for ANGLE { + fn tag() -> u16 { RTAG_ANGLE } + fn expected_size() -> usize { Some(8) } +} +impl Real8 for ANGLE {} + +pub struct UINTEGER {} +impl Record for UINTEGER { + fn tag() -> u16 { RTAG_UINTEGER } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for UINTEGER {} + +pub struct USTRING {} +impl Record for USTRING { + fn tag() -> u16 { RTAG_USTRING } + fn expected_size() -> usize { None } +} +impl ASCII for USTRING {} + +pub struct REFLIBS {} +impl Record for REFLIBS { + fn tag() -> u16 { RTAG_REFLIBS } + fn expected_size() -> usize { None } +} +impl REFLIBS { + fn check_size(&self, actual_size: usize) -> Result<(), &str> { + if actual_size % 44 == 0 { + Ok(()) + } else { + Err(format!("Expected record size divisible by 44, got {}", actual_size)) + } + } +} +impl ASCII for REFLIBS {} + +pub struct FONTS {} +impl Record for FONTS { + fn tag() -> u16 { RTAG_FONTS } + fn expected_size() -> usize { None } +} +impl FONTS { + fn check_size(&self, actual_size: usize) -> Result<(), &str> { + if actual_size % 44 == 0 { + Ok(()) + } else { + Err(format!("Expected record size divisible by 44, got {}", actual_size)) + } + } +} +impl ASCII for FONTS {} + +pub struct PATHTYPE {} +impl Record for PATHTYPE { + fn tag() -> u16 { RTAG_PATHTYPE } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for PATHTYPE {} + +pub struct GENERATIONS {} +impl Record for GENERATIONS { + fn tag() -> u16 { RTAG_GENERATIONS } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for GENERATIONS {} + +pub struct ATTRTABLE {} +impl Record for ATTRTABLE { + fn tag() -> u16 { RTAG_ATTRTABLE } + fn expected_size() -> usize { None } +} +impl ATTRTABLE { + fn check_size(&self, actual_size: usize) -> Result<(), &str> { + if actual_size % 44 == 0 { + Ok(()) + } else { + Err(format!("Expected record size divisible by 44, got {}", actual_size)) + } + } +} +impl ASCII for ATTRTABLE {} + +pub struct STYPTABLE {} +impl Record for STYPTABLE { + fn tag() -> u16 { RTAG_STYPTABLE } + fn expected_size() -> usize { None } +} +impl ASCII for STYPTABLE {} + +pub struct STRTYPE {} +impl Record for STRTYPE { + fn tag() -> u16 { RTAG_STRTYPE } + fn expected_size() -> usize { None } +} +impl Int2 for STRTYPE {} + +pub struct ELFLAGS {} +impl Record for ELFLAGS { + fn tag() -> u16 { RTAG_ELFLAGS } + fn expected_size() -> usize { Some(2) } +} +impl BitArray for ELFLAGS {} + +pub struct ELKEY {} +impl Record for ELKEY { + fn tag() -> u16 { RTAG_ELKEY } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for ELKEY {} + +pub struct LINKTYPE {} +impl Record for LINKTYPE { + fn tag() -> u16 { RTAG_LINKTYPE } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for LINKTYPE {} + +pub struct LINKKEYS {} +impl Record for LINKKEYS { + fn tag() -> u16 { RTAG_LINKKEYS } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for LINKKEYS {} + +pub struct NODETYPE {} +impl Record for NODETYPE { + fn tag() -> u16 { RTAG_NODETYPE } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for NODETYPE {} + +pub struct PROPATTR {} +impl Record for PROPATTR { + fn tag() -> u16 { RTAG_PROPATTR } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for PROPATTR {} + +pub struct PROPVALUE {} +impl Record for PROPVALUE { + fn tag() -> u16 { RTAG_PROPVALUE } + fn expected_size() -> usize { Some(2) } +} +impl ASCII for PROPVALUE {} + +pub struct BOX {} +impl Record for BOX { + fn tag() -> u16 { RTAG_BOX } + fn expected_size() -> usize { Some(0) } +} + +pub struct BOXTYPE {} +impl Record for BOXTYPE { + fn tag() -> u16 { RTAG_BOXTYPE } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for BOXTYPE {} + +pub struct PLEX {} +impl Record for PLEX { + fn tag() -> u16 { RTAG_PLEX } + fn expected_size() -> usize { Some(4) } +} +impl Int4 for PLEX {} + +pub struct BGNEXTN {} +impl Record for BGNEXTN { + fn tag() -> u16 { RTAG_BGNEXTN } + fn expected_size() -> usize { Some(4) } +} +impl Int4 for BGNEXTN {} + +pub struct ENDEXTN {} +impl Record for ENDEXTN { + fn tag() -> u16 { RTAG_ENDEXTN } + fn expected_size() -> usize { Some(4) } +} +impl Int4 for ENDEXTN {} + +pub struct TAPENUM {} +impl Record for TAPENUM { + fn tag() -> u16 { RTAG_TAPENUM } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for TAPENUM {} + +pub struct TAPECODE {} +impl Record for TAPECODE { + fn tag() -> u16 { RTAG_TAPECODE } + fn expected_size() -> usize { Some(2 * 6) } +} +impl Int2Array for TAPECODE {} + +pub struct STRCLASS {} +impl Record for STRCLASS { + fn tag() -> u16 { RTAG_STRCLASS } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for STRCLASS {} + +pub struct RESERVED {} +impl Record for RESERVED { + fn tag() -> u16 { RTAG_RESERVED } + fn expected_size() -> usize { Some(2) } +} +impl Int2Array for RESERVED {} + +pub struct FORMAT {} +impl Record for FORMAT { + fn tag() -> u16 { RTAG_FORMAT } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for FORMAT {} + +pub struct MASK {} +impl Record for MASK { + fn tag() -> u16 { RTAG_MASK } + fn expected_size() -> usize { None } +} +impl ASCII for MASK {} + +pub struct ENDMASKS {} +impl Record for ENDMASKS { + // End of MASKS records + fn tag() -> u16 { RTAG_ENDMASKS } + fn expected_size() -> usize { Some(0) } +} + +pub struct LIBDIRSIZE {} +impl Record for LIBDIRSIZE { + fn tag() -> u16 { RTAG_LIBDIRSIZE } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for LIBDIRSIZE {} + +pub struct SRFNAME {} +impl Record for SRFNAME { + fn tag() -> u16 { RTAG_SRFNAME } + fn expected_size() -> usize { None } +} +impl ASCII for SRFNAME {} + +pub struct LIBSECUR {} +impl Record for LIBSECUR { + fn tag() -> u16 { RTAG_LIBSECUR } + fn expected_size() -> usize { Some(2) } +} +impl Int2 for LIBSECUR {} + +pub struct BORDER {} +impl Record for BORDER { + fn tag() -> u16 { RTAG_BORDER } + fn expected_size() -> usize { Some(0) } +} + +pub struct SOFTFENCE {} +impl Record for SOFTFENCE { + fn tag() -> u16 { RTAG_SOFTFENCE } + fn expected_size() -> usize { Some(0) } +} + +pub struct HARDFENCE {} +impl Record for HARDFENCE { + fn tag() -> u16 { RTAG_HARDFENCE } + fn expected_size() -> usize { Some(0) } +} + +pub struct SOFTWIRE {} +impl Record for SOFTWIRE { + fn tag() -> u16 { RTAG_SOFTWIRE } + fn expected_size() -> usize { Some(0) } +} + +pub struct HARDWIRE {} +impl Record for HARDWIRE { + fn tag() -> u16 { RTAG_HARDWIRE } + fn expected_size() -> usize { Some(0) } +} + +pub struct PATHPORT {} +impl Record for PATHPORT { + fn tag() -> u16 { RTAG_PATHPORT } + fn expected_size() -> usize { Some(0) } +} + +pub struct NODEPORT {} +impl Record for NODEPORT { + fn tag() -> u16 { RTAG_NODEPORT } + fn expected_size() -> usize { Some(0) } +} + +pub struct USERCONSTRAINT {} +impl Record for USERCONSTRAINT { + fn tag() -> u16 { RTAG_USERCONSTRAINT } + fn expected_size() -> usize { Some(0) } +} + +pub struct SPACERERROR {} +impl Record for SPACERERROR { + fn tag() -> u16 { RTAG_SPACERERROR } + fn expected_size() -> usize { Some(0) } +} + +pub struct CONTACT {} +impl Record for CONTACT { + fn tag() -> u16 { RTAG_CONTACT } + fn expected_size() -> usize { Some(0) } +} diff --git a/src/test_basic.rs b/src/test_basic.rs new file mode 100644 index 0000000..6bd8bee --- /dev/null +++ b/src/test_basic.rs @@ -0,0 +1,121 @@ +/* +import struct + +import pytest # type: ignore +import numpy # type: ignore +from numpy.testing import assert_array_equal # type: ignore + +from .basic import parse_bitarray, parse_int2, parse_int4, parse_real8, parse_ascii +from .basic import pack_bitarray, pack_int2, pack_int4, pack_real8, pack_ascii +from .basic import decode_real8, encode_real8 + +from .basic import KlamathError + + +def test_parse_bitarray(): + assert(parse_bitarray(b'59') == 13625) + assert(parse_bitarray(b'\0\0') == 0) + assert(parse_bitarray(b'\xff\xff') == 65535) + + # 4 bytes (too long) + with pytest.raises(KlamathError): + parse_bitarray(b'4321') + + # empty data + with pytest.raises(KlamathError): + parse_bitarray(b'') + + +def test_parse_int2(): + assert_array_equal(parse_int2(b'59\xff\xff\0\0'), (13625, -1, 0)) + + # odd length + with pytest.raises(KlamathError): + parse_int2(b'54321') + + # empty data + with pytest.raises(KlamathError): + parse_int2(b'') + + +def test_parse_int4(): + assert_array_equal(parse_int4(b'4321'), (875770417,)) + + # length % 4 != 0 + with pytest.raises(KlamathError): + parse_int4(b'654321') + + # empty data + with pytest.raises(KlamathError): + parse_int4(b'') + + +def test_decode_real8(): + # zeroes + assert(decode_real8(numpy.array([0x0])) == 0) + assert(decode_real8(numpy.array([1<<63])) == 0) # negative + assert(decode_real8(numpy.array([0xff << 56])) == 0) # denormalized + + assert(decode_real8(numpy.array([0x4110 << 48])) == 1.0) + assert(decode_real8(numpy.array([0xC120 << 48])) == -2.0) + + +def test_parse_real8(): + packed = struct.pack('>3Q', 0x0, 0x4110_0000_0000_0000, 0xC120_0000_0000_0000) + assert_array_equal(parse_real8(packed), (0.0, 1.0, -2.0)) + + # length % 8 != 0 + with pytest.raises(KlamathError): + parse_real8(b'0987654321') + + # empty data + with pytest.raises(KlamathError): + parse_real8(b'') + + +def test_parse_ascii(): + # empty data + with pytest.raises(KlamathError): + parse_ascii(b'') + + assert(parse_ascii(b'12345') == b'12345') + assert(parse_ascii(b'12345\0') == b'12345') # strips trailing null byte + + +def test_pack_bitarray(): + packed = pack_bitarray(321) + assert(len(packed) == 2) + assert(packed == struct.pack('>H', 321)) + + +def test_pack_int2(): + packed = pack_int2((3, 2, 1)) + assert(len(packed) == 3*2) + assert(packed == struct.pack('>3h', 3, 2, 1)) + assert(pack_int2([-3, 2, -1]) == struct.pack('>3h', -3, 2, -1)) + + +def test_pack_int4(): + packed = pack_int4((3, 2, 1)) + assert(len(packed) == 3*4) + assert(packed == struct.pack('>3l', 3, 2, 1)) + assert(pack_int4([-3, 2, -1]) == struct.pack('>3l', -3, 2, -1)) + + +def test_encode_real8(): + assert(encode_real8(numpy.array([0.0])) == 0) + arr = numpy.array((1.0, -2.0, 1e-9, 1e-3, 1e-12)) + assert_array_equal(decode_real8(encode_real8(arr)), arr) + + +def test_pack_real8(): + reals = (0, 1, -1, 0.5, 1e-9, 1e-3, 1e-12) + packed = pack_real8(reals) + assert(len(packed) == len(reals) * 8) + assert_array_equal(parse_real8(packed), reals) + + +def test_pack_ascii(): + assert(pack_ascii(b'4321') == b'4321') + assert(pack_ascii(b'321') == b'321\0') +*/ From e0a634f47665427046499f8afd3c1cbfadef058a Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sat, 18 Dec 2021 21:17:43 -0800 Subject: [PATCH 02/31] add gitignore --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5d80173 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/target/ +**/*.rs.bk +Cargo.lock + +*.swp +*.swo From 00a29941b7f393a05f03a3f4760e627f1972511d Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 20 Mar 2022 16:28:34 -0700 Subject: [PATCH 03/31] index on (no branch): 48db472 snapshot 2021-12-18 21:05:00.635887 From 02e483c8cdaba081518076aeeecaba41265225f7 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 8 May 2022 16:41:43 -0700 Subject: [PATCH 04/31] update to 2021 edition --- Cargo.toml | 3 ++- src/__init__.rs | 29 ----------------------------- src/elements.rs | 8 ++++---- src/lib.rs | 1 - src/library.rs | 27 ++++++++++++++------------- src/record.rs | 8 ++++---- src/records.rs | 2 +- 7 files changed, 25 insertions(+), 53 deletions(-) delete mode 100644 src/__init__.rs diff --git a/Cargo.toml b/Cargo.toml index 414196a..01429e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,8 @@ name = "rs-klamath" version = "0.1.0" authors = ["jan "] +edition = "2021" [dependencies] byteorder = "^1" -nom = "^7" +#nom = "^7" diff --git a/src/__init__.rs b/src/__init__.rs deleted file mode 100644 index 44d6c6b..0000000 --- a/src/__init__.rs +++ /dev/null @@ -1,29 +0,0 @@ -/* - * `klamath` is a Python module for reading and writing to the GDSII file format. - * - * The goal is to keep this library simple: - * - Map data types directly wherever possible. - * * Presents an accurate representation of what is saved to the file. - * * Avoids excess copies / allocations for speed. - * * No "automatic" error checking, except when casting datatypes. - * If data integrity checks are provided at all, they must be - * explicitly run by the caller. - * - Low-level functionality is first-class. - * * Meant for use-cases where the caller wants to read or write - * individual GDS records. - * * Offers complete control over the written file. - * - Opinionated and limited high-level functionality. - * * Discards or ignores rarely-encountered data types. - * * Keeps functions simple and reusable. - * * Only de/encodes the file format, doesn't provide tools to modify - * the data itself. - * * Still requires explicit values for most fields. - * - No compilation - * * Uses `numpy` for speed, since it's commonly available / pre-built. - * * Building this library should not require a compiler. - * - * `klamath` was built to provide a fast and versatile GDS interface for - * [masque](https://mpxd.net/code/jan/masque), which provides higher-level - * tools for working with hierarchical design data and supports multiple - * file formats. - */ diff --git a/src/elements.rs b/src/elements.rs index 8c2e35b..516a622 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -3,16 +3,16 @@ /// structure references) and associated properties. /// -use records::{BOX, BOUNDARY, NODE, PATH, TEXT, SREF, AREF, +use crate::records::{BOX, BOUNDARY, NODE, PATH, TEXT, SREF, AREF, DATATYPE, PATHTYPE, BOXTYPE, NODETYPE, TEXTTYPE, LAYER, XY, WIDTH, COLROW, PRESENTATION, STRING, STRANS, MAG, ANGLE, PROPATTR, PROPVALUE, ENDEL, BGNEXTN, ENDEXTN, SNAME, }; -use records; -use record::{RecordHeader, Record}; -use basic::{OResult, IResult, fail}; +use crate::records; +use crate::record::{RecordHeader, Record}; +use crate::basic::{OResult, IResult, fail}; use std::collections::HashMap; use std::io::Write; diff --git a/src/lib.rs b/src/lib.rs index b17d455..dbaf483 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,4 @@ //#![feature(generic_associated_types)] -#![feature(destructuring_assignment)] extern crate byteorder; diff --git a/src/library.rs b/src/library.rs index 06ff7ba..84d74dc 100644 --- a/src/library.rs +++ b/src/library.rs @@ -5,12 +5,12 @@ use std::io::Write; use std::collections::HashMap; -use record; -use record::{RecordHeader, Record}; -use records; -use elements; -use elements::{Element}; -use basic::{IResult, OResult, take_bytes, fail}; +use crate::record; +use crate::record::{RecordHeader, Record}; +use crate::records; +use crate::elements; +use crate::elements::{Element}; +use crate::basic::{IResult, OResult, take_bytes, fail}; const DEFAULT_DATE: [i16; 6] = [1900, 0, 0, 0, 0, 0]; @@ -289,6 +289,7 @@ impl Cell { } +/* /// /// Scan through a GDS file, building a table of instance counts /// `{b'structure_name': {b'ref_name': count}}`. @@ -304,8 +305,10 @@ impl Cell { pub fn scan_hierarchy(input: &[u8]) -> IResult, HashMap::, u32>>> { let mut structures = HashMap::new(); - + let mut ref_name = None; + let mut ref_count = None; let (mut input, mut header) = RecordHeader::read(input)?; + let mut cur_structure = HashMap::new(); while header.tag != records::RTAG_ENDLIB { match header.tag { records::RTAG_BGNSTR => { @@ -317,8 +320,6 @@ pub fn scan_hierarchy(input: &[u8]) -> IResult, HashMap:: IResult, HashMap:: IResult, u32))> { let (input, found_struc) = records::BGNSTR.skip_past(input)?; - if not found_struc { + if !found_struc { return Ok((input, None)) } let mut cur_structure = HashMap::new(); @@ -390,3 +390,4 @@ pub fn count_ref(input: &[u8]) -> IResult, u32))> { structures.insert(name, cur_structure); (input, header) = RecordHeader::read(input1)?; } +*/ diff --git a/src/record.rs b/src/record.rs index 8d01c93..7d04085 100644 --- a/src/record.rs +++ b/src/record.rs @@ -6,10 +6,10 @@ use std::convert::TryInto; use byteorder::{ByteOrder, BigEndian}; -use basic::{pack_datetime, pack_bitarray, pack_ascii, pack_int2, pack_int4, pack_real8}; #[warn(unused_imports)] -use basic::{parse_datetime, parse_bitarray, parse_ascii, parse_int2, parse_int4, parse_real8}; #[warn(unused_imports)] -use basic::{OResult, IResult, fail, parse_u16, take_bytes}; -use records; +use crate::basic::{pack_datetime, pack_bitarray, pack_ascii, pack_int2, pack_int4, pack_real8}; #[warn(unused_imports)] +use crate::basic::{parse_datetime, parse_bitarray, parse_ascii, parse_int2, parse_int4, parse_real8}; #[warn(unused_imports)] +use crate::basic::{OResult, IResult, fail, parse_u16, take_bytes}; +use crate::records; //#[no_mangle] diff --git a/src/records.rs b/src/records.rs index 972cc78..21a5bb0 100644 --- a/src/records.rs +++ b/src/records.rs @@ -2,7 +2,7 @@ /// Record type and tag definitions /// -use record::{Record, Int2, Int4, Int2Array, Int4Array, Real8, Real8Pair, DateTimePair, BitArray, ASCII, Empty}; +use crate::record::{Record, Int2, Int4, Int2Array, Int4Array, Real8, Real8Pair, DateTimePair, BitArray, ASCII, Empty}; //use std::io::Write; From 921afa556958054e60fe86ccb706b9ec4439bb32 Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 20 May 2022 19:44:17 -0700 Subject: [PATCH 05/31] whitespace --- src/elements.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/elements.rs b/src/elements.rs index 516a622..b711da2 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -4,11 +4,11 @@ /// use crate::records::{BOX, BOUNDARY, NODE, PATH, TEXT, SREF, AREF, - DATATYPE, PATHTYPE, BOXTYPE, NODETYPE, TEXTTYPE, - LAYER, XY, WIDTH, COLROW, PRESENTATION, STRING, - STRANS, MAG, ANGLE, PROPATTR, PROPVALUE, - ENDEL, BGNEXTN, ENDEXTN, SNAME, - }; + DATATYPE, PATHTYPE, BOXTYPE, NODETYPE, TEXTTYPE, + LAYER, XY, WIDTH, COLROW, PRESENTATION, STRING, + STRANS, MAG, ANGLE, PROPATTR, PROPVALUE, + ENDEL, BGNEXTN, ENDEXTN, SNAME, + }; use crate::records; use crate::record::{RecordHeader, Record}; From e5e0adab7119bd0899059a4b20057e9326a52f3e Mon Sep 17 00:00:00 2001 From: jan Date: Tue, 17 Dec 2024 18:38:40 -0800 Subject: [PATCH 06/31] fixup imports/exports --- src/basic.rs | 12 ++++++------ src/library.rs | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/basic.rs b/src/basic.rs index d8fa7da..cd22d6f 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -232,7 +232,7 @@ pub fn encode_real8(fnum: f64) -> u64 { mod tests { #[test] fn test_parse_bitarray() { - use basic::parse_bitarray; + use crate::basic::parse_bitarray; //assert!(parse_bitarray(b"59") == 13625); assert_eq!(parse_bitarray(b"\x00\x00").unwrap().1, [false; 16]); @@ -251,7 +251,7 @@ mod tests { #[test] fn test_parse_int2() { - use basic::parse_int2; + use crate::basic::parse_int2; assert_eq!(parse_int2(b"59").unwrap().1, 13625); assert_eq!(parse_int2(b"\0\0").unwrap().1, 0); assert_eq!(parse_int2(b"\xff\xff").unwrap().1, -1); @@ -259,13 +259,13 @@ mod tests { #[test] fn test_parse_int4() { - use basic::parse_int4; + use crate::basic::parse_int4; assert_eq!(parse_int4(b"4321").unwrap().1, 875770417); } #[test] fn test_decode_real8() { - use basic::decode_real8; + use crate::basic::decode_real8; // zeroes assert_eq!(decode_real8(0x0), 0.0); @@ -280,7 +280,7 @@ mod tests { #[test] fn test_parse_real8() { - use basic:: parse_real8; + use crate::basic:: parse_real8; assert_eq!(0.0, parse_real8(&[0; 8]).unwrap().1); assert_eq!(1.0, parse_real8(&[0x41, 0x10, 0, 0, 0, 0, 0, 0]).unwrap().1); @@ -289,7 +289,7 @@ mod tests { #[test] fn test_parse_ascii() { - use basic::parse_ascii; + use crate::basic::parse_ascii; assert_eq!(parse_ascii(b"12345", 5).unwrap().1, b"12345"); assert_eq!(parse_ascii(b"12345\0", 6).unwrap().1, b"12345"); // strips trailing null byte diff --git a/src/library.rs b/src/library.rs index 84d74dc..e5b5758 100644 --- a/src/library.rs +++ b/src/library.rs @@ -5,12 +5,12 @@ use std::io::Write; use std::collections::HashMap; -use crate::record; -use crate::record::{RecordHeader, Record}; -use crate::records; -use crate::elements; -use crate::elements::{Element}; -use crate::basic::{IResult, OResult, take_bytes, fail}; +pub use crate::record; +pub use crate::record::{RecordHeader, Record}; +pub use crate::records; +pub use crate::elements; +pub use crate::elements::{Element}; +pub use crate::basic::{IResult, OResult, take_bytes, fail}; const DEFAULT_DATE: [i16; 6] = [1900, 0, 0, 0, 0, 0]; From 24d8e32173ee8538a9110ca2d4c2129c345a4542 Mon Sep 17 00:00:00 2001 From: jan Date: Tue, 17 Dec 2024 18:38:50 -0800 Subject: [PATCH 07/31] add leading zeros to mask --- src/basic.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic.rs b/src/basic.rs index cd22d6f..0899fa9 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -161,7 +161,7 @@ pub fn encode_real8(fnum: f64) -> u64 { let ieee = fnum.to_bits(); let sign = ieee & 0x8000_0000_0000_0000; let ieee_exp = (ieee >> 52) as i32 & 0x7ff; - let ieee_mant = ieee & 0xf_ffff_ffff_ffff; + let ieee_mant = ieee & 0x000f_ffff_ffff_ffff; let subnorm = (ieee_exp == 0) & (ieee_mant != 0); if (ieee_exp == 0) & (ieee_mant == 0) { From 6531bec6e5282e86a7e9738b3d65fc2d828958ae Mon Sep 17 00:00:00 2001 From: jan Date: Tue, 17 Dec 2024 18:39:02 -0800 Subject: [PATCH 08/31] ErrType should be Debug --- src/basic.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/basic.rs b/src/basic.rs index 0899fa9..0fa8392 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -8,6 +8,7 @@ pub type OResult = Result; pub type IResult<'a, O> = Result<(&'a [u8], O), (&'a [u8], ErrType)>; +#[derive(Debug)] pub enum ErrType { Incomplete(Option), Failed(String), From 6186f1d5613a9bb94652598da6f57317e307b95e Mon Sep 17 00:00:00 2001 From: jan Date: Wed, 18 Dec 2024 16:43:41 -0800 Subject: [PATCH 09/31] enable more tests --- src/basic.rs | 81 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/src/basic.rs b/src/basic.rs index 0fa8392..72fd03a 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -297,43 +297,80 @@ mod tests { assert_eq!(parse_ascii(b"123456", 6).unwrap().1, b"123456"); } -/* + #[test] fn test_pack_bitarray() { - packed = pack_bitarray(321) - assert_eq!(len(packed), 2); - assert_eq!(packed, struct.pack('>H', 321)); + use crate::basic::pack_bitarray; + let mut buf = [10; 3]; + let mut bools = [false; 16]; + bools[1] = true; + bools[2] = true; + bools[11] = true; + + pack_bitarray(&mut buf, &bools); + assert_eq!(buf[0], 0b0110_0000); + assert_eq!(buf[1], 0b0001_0000); + assert_eq!(buf[2], 10); } + #[test] fn test_pack_int2() { - packed = pack_int2((3, 2, 1)) - assert(len(packed) == 3*2) - assert(packed == struct.pack('>3h', 3, 2, 1)) - assert(pack_int2([-3, 2, -1]) == struct.pack('>3h', -3, 2, -1)) + use crate::basic::pack_int2; + let mut buf = [10; 3 * 2]; + pack_int2(&mut buf, -3); + pack_int2(&mut buf[2..], 2); + pack_int2(&mut buf[4..], -1); + assert_eq!(buf[0..2], [0xFF, 0xFD]); + assert_eq!(buf[2..4], [0x00, 0x02]); + assert_eq!(buf[4..6], [0xFF, 0xFF]); } + #[test] fn test_pack_int4() { - packed = pack_int4((3, 2, 1)) - assert(len(packed) == 3*4) - assert(packed == struct.pack('>3l', 3, 2, 1)) - assert(pack_int4([-3, 2, -1]) == struct.pack('>3l', -3, 2, -1)) + use crate::basic::pack_int4; + let mut buf = [10; 3 * 4]; + pack_int4(&mut buf, -3); + pack_int4(&mut buf[4..], 2); + pack_int4(&mut buf[8..], -1); + assert_eq!(buf[0..4], [0xFF, 0xFF, 0xFF, 0xFD]); + assert_eq!(buf[4..8], [0x00, 0x00, 0x00, 0x02]); + assert_eq!(buf[8..12], [0xFF, 0xFF, 0xFF, 0xFF]); } + #[test] fn test_encode_real8() { - assert(encode_real8(numpy.array([0.0])) == 0) - arr = numpy.array((1.0, -2.0, 1e-9, 1e-3, 1e-12)) - assert_array_equal(decode_real8(encode_real8(arr)), arr) + use crate::basic::{encode_real8, decode_real8}; + const REALS: [f64; 5] = [1.0, -2.0, 1e-9, 1e-3, 1e-12]; + for vv in REALS { + print!("{vv}\n"); + assert!((decode_real8(encode_real8(vv)) - vv).abs() < f64::EPSILON); + } } + #[test] fn test_pack_real8() { - reals = (0, 1, -1, 0.5, 1e-9, 1e-3, 1e-12) - packed = pack_real8(reals) - assert(len(packed) == len(reals) * 8) - assert_array_equal(parse_real8(packed), reals) + use crate::basic::{pack_real8, parse_real8}; + const COUNT: usize = 7; + const REALS: [f64; COUNT] = [0.0, 1.0, -1.0, 0.5, 1e-9, 1e-3, 1e-12]; + let mut buf = [10; 8 * COUNT]; + for (ii, &vv) in REALS.iter().enumerate() { + pack_real8(&mut buf[ii * 8..], vv); + } + for (ii, &vv) in REALS.iter().enumerate() { + print!("{vv}\n"); + let parsed_val = parse_real8(&buf[ii * 8..]).unwrap().1; + assert!((parsed_val - vv).abs() < f64::EPSILON); + } } + #[test] fn test_pack_ascii() { - assert(pack_ascii(b'4321') == b'4321') - assert(pack_ascii(b'321') == b'321\0') + use crate::basic::pack_ascii; + let mut buf = [10; 12]; + pack_ascii(&mut buf[0..], "4321".as_bytes()); + pack_ascii(&mut buf[6..], "321".as_bytes()); + assert_eq!(&buf[0..4], "4321".as_bytes()); + assert_eq!(&buf[4..6], [10, 10]); + assert_eq!(&buf[6..9], "321".as_bytes()); + assert_eq!(&buf[9..], [0, 10, 10]); } -*/ } From b049f70dce1a20899340f33e060fa0c60e0e1876 Mon Sep 17 00:00:00 2001 From: jan Date: Wed, 18 Dec 2024 16:43:50 -0800 Subject: [PATCH 10/31] check_size should be public --- src/records.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/records.rs b/src/records.rs index 21a5bb0..4fe21fa 100644 --- a/src/records.rs +++ b/src/records.rs @@ -290,7 +290,7 @@ impl Record for REFLIBS { fn expected_size() -> Option { None } } impl REFLIBS { - fn check_size(actual_size: usize) -> Result<(), String> { + pub fn check_size(actual_size: usize) -> Result<(), String> { if actual_size % 44 == 0 { Ok(()) } else { @@ -305,7 +305,7 @@ impl Record for FONTS { fn expected_size() -> Option { None } } impl FONTS { - fn check_size(actual_size: usize) -> Result<(), String> { + pub fn check_size(actual_size: usize) -> Result<(), String> { if actual_size % 44 == 0 { Ok(()) } else { @@ -332,7 +332,7 @@ impl Record for ATTRTABLE { fn expected_size() -> Option { None } } impl ATTRTABLE { - fn check_size(actual_size: usize) -> Result<(), String> { + pub fn check_size(actual_size: usize) -> Result<(), String> { if actual_size % 44 == 0 { Ok(()) } else { From a1e9a3f90ca0e8d832ef2ad39618c0096e9f4c28 Mon Sep 17 00:00:00 2001 From: jan Date: Wed, 18 Dec 2024 16:44:05 -0800 Subject: [PATCH 11/31] fix incomplete condition --- src/basic.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic.rs b/src/basic.rs index 72fd03a..0bd0636 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -25,7 +25,7 @@ pub fn incomplete(input: &[u8], size: Option) -> IResult { pub fn take_bytes>(input: &[u8], count: CC) -> IResult<&[u8]> { let cc = count.into(); - if input.len() > cc { + if input.len() < cc { incomplete(input, Some(cc)) } else { let (taken, input) = input.split_at(cc); From babd7f14025b15a6aeab621e1d812b3026fba609 Mon Sep 17 00:00:00 2001 From: jan Date: Wed, 18 Dec 2024 16:51:42 -0800 Subject: [PATCH 12/31] test for panic if float is too large for real8 --- src/basic.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/basic.rs b/src/basic.rs index 0bd0636..016cc6f 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -275,10 +275,16 @@ mod tests { assert_eq!(decode_real8(0x4110 << 48), 1.0); assert_eq!(decode_real8(0xC120 << 48), -2.0); - - //TODO panics on invalid? } + #[test] + #[should_panic] + fn test_encode_real8_panic() { + use crate::basic::encode_real8; + encode_real8(1e80); + } + + #[test] fn test_parse_real8() { use crate::basic:: parse_real8; From e42ac03c95aa96d38d5637f3e639fe3d8703ad3e Mon Sep 17 00:00:00 2001 From: jan Date: Wed, 18 Dec 2024 17:27:10 -0800 Subject: [PATCH 13/31] add FloatTooBigError --- src/basic.rs | 39 +++++++++++++++++++++++++-------------- src/record.rs | 6 +++--- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/basic.rs b/src/basic.rs index 016cc6f..1019be1 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -3,6 +3,7 @@ /// use byteorder::{ByteOrder, BigEndian}; use std::io; +use std::fmt; pub type OResult = Result; @@ -14,7 +15,6 @@ pub enum ErrType { Failed(String), } - pub fn fail(input: &[u8], msg: String) -> IResult { Err((input, ErrType::Failed(msg))) } @@ -118,20 +118,21 @@ pub fn bitarray2int(bits: &[bool; 16]) -> u16 { } pub fn pack_bitarray(buf: &mut [u8], bits: &[bool; 16]) { - BigEndian::write_u16(buf, bitarray2int(bits)) + BigEndian::write_u16(buf, bitarray2int(bits)); } pub fn pack_int2(buf: &mut [u8], int: i16) { - BigEndian::write_i16(buf, int) + BigEndian::write_i16(buf, int); } pub fn pack_int4(buf: &mut [u8], int: i32) { - BigEndian::write_i32(buf, int) + BigEndian::write_i32(buf, int); } -pub fn pack_real8(buf: &mut [u8], fnum: f64) { - BigEndian::write_u64(buf, encode_real8(fnum)) +pub fn pack_real8(buf: &mut [u8], fnum: f64) -> Result<(), FloatTooBigError> { + BigEndian::write_u64(buf, encode_real8(fnum)?); + Ok(()) } pub fn pack_ascii(buf: &mut [u8], data: &[u8]) -> usize { @@ -156,8 +157,18 @@ pub fn pack_datetime(buf: &mut [u8], date: &[i16; 6]) { } +#[derive(Debug, Clone)] +pub struct FloatTooBigError { + float_value: f64, +} +impl fmt::Display for FloatTooBigError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Float {0} is too large for Real8", self.float_value) + } +} + /// Convert from float64 to GDS REAL8 representation. -pub fn encode_real8(fnum: f64) -> u64 { +pub fn encode_real8(fnum: f64) -> Result { // Split the ieee float bitfields let ieee = fnum.to_bits(); let sign = ieee & 0x8000_0000_0000_0000; @@ -166,7 +177,7 @@ pub fn encode_real8(fnum: f64) -> u64 { let subnorm = (ieee_exp == 0) & (ieee_mant != 0); if (ieee_exp == 0) & (ieee_mant == 0) { - return 0 + return Ok(0) } // IEEE normal double is (1 + ieee_mant / 2^52) * 2^(ieee_exp - 1023) @@ -207,7 +218,7 @@ pub fn encode_real8(fnum: f64) -> u64 { if gds_exp < -14 { // number is too small - return 0 + return Ok(0) } let neg_biased = gds_exp < 0; @@ -218,13 +229,13 @@ pub fn encode_real8(fnum: f64) -> u64 { let too_big = (gds_exp > 0x7f) & !subnorm; if too_big { - panic!("Number too big for real8 format"); //TODO error handling + return Err(FloatTooBigError{float_value: fnum}); } let gds_exp_bits = (gds_exp as u64) << 56; let real8 = sign | gds_exp_bits | gds_mant; - real8 + Ok(real8) } @@ -281,7 +292,7 @@ mod tests { #[should_panic] fn test_encode_real8_panic() { use crate::basic::encode_real8; - encode_real8(1e80); + encode_real8(1e80).unwrap(); } @@ -348,7 +359,7 @@ mod tests { const REALS: [f64; 5] = [1.0, -2.0, 1e-9, 1e-3, 1e-12]; for vv in REALS { print!("{vv}\n"); - assert!((decode_real8(encode_real8(vv)) - vv).abs() < f64::EPSILON); + assert!((decode_real8(encode_real8(vv).unwrap()) - vv).abs() < f64::EPSILON); } } @@ -359,7 +370,7 @@ mod tests { const REALS: [f64; COUNT] = [0.0, 1.0, -1.0, 0.5, 1e-9, 1e-3, 1e-12]; let mut buf = [10; 8 * COUNT]; for (ii, &vv) in REALS.iter().enumerate() { - pack_real8(&mut buf[ii * 8..], vv); + pack_real8(&mut buf[ii * 8..], vv).unwrap(); } for (ii, &vv) in REALS.iter().enumerate() { print!("{vv}\n"); diff --git a/src/record.rs b/src/record.rs index 7d04085..41921c4 100644 --- a/src/record.rs +++ b/src/record.rs @@ -279,7 +279,7 @@ impl RecordData for Real8 { } fn pack_into(buf: &mut [u8], data: &Self::InData) { - pack_real8(buf, *data) + pack_real8(buf, *data).expect(&format!("Float {0} too big for Real8", data)) } fn pack(data: &Self::InData) -> Self::ByteData { @@ -303,8 +303,8 @@ impl RecordData for Real8Pair { } fn pack_into(buf: &mut [u8], data: &Self::InData) { - pack_real8(&mut buf[8 * 0..], data.0); - pack_real8(&mut buf[8 * 1..], data.1); + pack_real8(&mut buf[8 * 0..], data.0).expect(&format!("Float.0 {0} too big for Real8", data.0)); + pack_real8(&mut buf[8 * 1..], data.1).expect(&format!("Float.1 {0} too big for Real8", data.1)); } fn pack(data: &Self::InData) -> Self::ByteData { From ad1c2f1c35ed96b9878631706b59f60acafd720b Mon Sep 17 00:00:00 2001 From: jan Date: Thu, 19 Dec 2024 19:02:55 -0800 Subject: [PATCH 14/31] remove commented code --- src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index dbaf483..2a34034 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,3 @@ -//#![feature(generic_associated_types)] - extern crate byteorder; pub mod basic; From ba07d253d266b0eb19b64e2663975b772b7198c0 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 21 Dec 2024 09:50:19 -0800 Subject: [PATCH 15/31] pyo3 variant working --- Cargo.toml | 11 ++- klamath_rs_ext/__init__.py | 6 ++ klamath_rs_ext/basic.py | 56 +++++++++++++ klamath_rs_ext/py.typed | 0 pyproject.toml | 7 ++ src/lib.rs | 156 +++++++++++++++++++++++++++++++++++++ 6 files changed, 234 insertions(+), 2 deletions(-) create mode 100644 klamath_rs_ext/__init__.py create mode 100644 klamath_rs_ext/basic.py create mode 100644 klamath_rs_ext/py.typed create mode 100644 pyproject.toml diff --git a/Cargo.toml b/Cargo.toml index 01429e9..41cbb68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,16 @@ [package] -name = "rs-klamath" +name = "klamath_rs_ext" version = "0.1.0" authors = ["jan "] edition = "2021" + +[lib] +name = "klamath_rs_ext" +crate-type = ["cdylib", "rlib"] + + [dependencies] byteorder = "^1" -#nom = "^7" +pyo3 = "^0" +numpy = "^0" diff --git a/klamath_rs_ext/__init__.py b/klamath_rs_ext/__init__.py new file mode 100644 index 0000000..fa02fb7 --- /dev/null +++ b/klamath_rs_ext/__init__.py @@ -0,0 +1,6 @@ +from .basic import pack_int2 as pack_int2 +from .basic import pack_int4 as pack_int4 + + +__version__ = 0.1 + diff --git a/klamath_rs_ext/basic.py b/klamath_rs_ext/basic.py new file mode 100644 index 0000000..7237b1c --- /dev/null +++ b/klamath_rs_ext/basic.py @@ -0,0 +1,56 @@ +from collections.abc import Sequence + +import numpy +from numpy.typing import NDArray + +from .klamath_rs_ext import arr_to_int2, arr_to_int4 + + +def pack_int2(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: + arr = numpy.asarray(data) + + if arr.dtype in ( + numpy.float64, numpy.float32, + numpy.int64, numpy.uint64, + numpy.int32, numpy.uint32, + numpy.int16, numpy.uint16, + ): + arr = numpy.require(arr, requirements=('C_CONTIGUOUS', 'ALIGNED', 'WRITEABLE', 'OWNDATA')) + if arr is data: + arr = numpy.array(arr, copy=True) + arr_to_int2(arr) + i2arr = arr.view('>i2')[::arr.itemsize // 2] + return i2arr.tobytes() + + if arr.dtype == numpy.dtype('>i2'): + return arr.tobytes() + + if (arr > 32767).any() or (arr < -32768).any(): + raise Exception(f'int2 data out of range: {arr}') + + return arr.astype('>i2').tobytes() + + +def pack_int4(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: + arr = numpy.asarray(data) + + if arr.dtype in ( + numpy.float64, numpy.float32, + numpy.int64, numpy.uint64, + numpy.int32, numpy.uint32, + ): + arr = numpy.require(arr, requirements=('C_CONTIGUOUS', 'ALIGNED', 'WRITEABLE', 'OWNDATA')) + if arr is data: + arr = numpy.array(arr, copy=True) + arr_to_int4(arr) + i4arr = arr.view('>i4')[::arr.itemsize // 4] + return i4arr.tobytes() + + if arr.dtype == numpy.dtype('>i4'): + return arr.tobytes() + + if (arr > 2147483647).any() or (arr < -2147483648).any(): + raise Exception(f'int4 data out of range: {arr}') + + return arr.astype('>i4').tobytes() + diff --git a/klamath_rs_ext/py.typed b/klamath_rs_ext/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9704ea5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[build-system] +requires = ["maturin>1.0,<2.0"] +build-backend = "maturin" + + +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/src/lib.rs b/src/lib.rs index 2a34034..b64ab5d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,3 +5,159 @@ pub mod record; pub mod records; pub mod elements; pub mod library; + + +//use ndarray; +use numpy::{PyArray1, PyUntypedArray, PyUntypedArrayMethods, PyArrayDescrMethods, PyArrayMethods, dtype}; +use pyo3::prelude::{Python, pymodule, PyModule, PyResult, Bound, wrap_pyfunction, pyfunction, PyModuleMethods, PyAnyMethods}; +use pyo3::exceptions::{PyValueError, PyTypeError}; + + +#[pymodule] +fn klamath_rs_ext(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(arr_to_int2, m)?)?; + m.add_function(wrap_pyfunction!(arr_to_int4, m)?)?; + Ok(()) +} + + +#[pyfunction] +fn arr_to_int2(py: Python<'_>, pyarr: &Bound<'_, PyUntypedArray>) -> PyResult<()> { + use rust_util::ToInt2BE; + + assert!(pyarr.is_c_contiguous(), "Array must be c-contiguous!"); + + macro_rules! i2if { + ( $el_type:expr, $tt:ty ) => { + if $el_type.is_equiv_to(&dtype::<$tt>(py)) { + let arr = pyarr.downcast::>()?; + let mut array = unsafe { arr.as_array_mut() }; + for xx in array.iter_mut() { + *xx = <$tt>::convert_to_i2be(*xx).map_err( + |e| PyValueError::new_err(format!("Invalid value for 2-byte int: {}", e)) + )?; + } + return Ok(()) + } + } + } + + + let el_type = pyarr.dtype(); + i2if!(el_type, f64); + i2if!(el_type, f32); + i2if!(el_type, i64); + i2if!(el_type, u64); + i2if!(el_type, i32); + i2if!(el_type, u32); + i2if!(el_type, i16); + i2if!(el_type, u16); + + Err(PyTypeError::new_err(format!("arr_to_int2 not implemented for type {:?}", el_type))) +} + + +#[pyfunction] +fn arr_to_int4(py: Python<'_>, pyarr: &Bound<'_, PyUntypedArray>) -> PyResult<()> { + use rust_util::ToInt4BE; + + assert!(pyarr.is_c_contiguous(), "Array must be c-contiguous!"); + + macro_rules! i4if { + ( $el_type:expr, $tt:ty ) => { + if $el_type.is_equiv_to(&dtype::<$tt>(py)) { + let arr = pyarr.downcast::>()?; + let mut array = unsafe { arr.as_array_mut() }; + for xx in array.iter_mut() { + *xx = <$tt>::convert_to_i4be(*xx).map_err( + |e| PyValueError::new_err(format!("Invalid value for 4-byte int: {}", e)) + )?; + } + return Ok(()) + } + } + } + + let el_type = pyarr.dtype(); + i4if!(el_type, f64); + i4if!(el_type, f32); + i4if!(el_type, i64); + i4if!(el_type, u64); + i4if!(el_type, i32); + i4if!(el_type, u32); + + Err(PyTypeError::new_err(format!("arr_to_int4 not implemented for type {:?}", el_type))) +} + + +mod rust_util { + use byteorder::{ByteOrder, BigEndian}; + use std::mem::size_of; + + pub trait ToInt2BE { + fn convert_to_i2be(ii: Self) -> Result where Self: Sized; + } + + pub trait ToInt4BE { + fn convert_to_i4be(ii: Self) -> Result where Self: Sized; + } + + macro_rules! impl_i2be { + ( $tt:ty ) => { + impl ToInt2BE for $tt { + fn convert_to_i2be(ii: $tt) -> Result<$tt, $tt> { + if ii < i16::MIN as $tt { return Err(ii); } + if ii > i16::MAX as $tt { return Err(ii); } + + let mut buf = [0; size_of::<$tt>()]; + BigEndian::write_i16(&mut buf, ii as i16); + Ok(<$tt>::from_le_bytes(buf)) + } + } + } + } + + macro_rules! impl_i4be { + ( $tt:ty ) => { + impl ToInt4BE for $tt { + fn convert_to_i4be(ii: $tt) -> Result<$tt, $tt> { + if ii < i32::MIN as $tt { return Err(ii); } + if ii > i32::MAX as $tt { return Err(ii); } + + let mut buf = [0; size_of::<$tt>()]; + BigEndian::write_i32(&mut buf, ii as i32); + Ok(<$tt>::from_le_bytes(buf)) + } + } + } + } + + impl_i2be!(f64); + impl_i4be!(f64); + + impl_i2be!(f32); + impl_i4be!(f32); + + impl_i2be!(i64); + impl_i4be!(i64); + impl_i2be!(u64); + impl_i4be!(u64); + + impl_i2be!(i32); + impl_i4be!(i32); + impl_i2be!(u32); + impl_i4be!(u32); + + impl_i2be!(i16); + impl_i2be!(u16); + + // Does not fit + //impl_i4be!(i16); + //impl_i4be!(u16); + // + //impl_i2be!(i8); + //impl_i4be!(i8); + //impl_i2be!(u8); + //impl_i4be!(u8); + +} From 320958d888fcd05b36d9f0fa8b9b29aeddb84582 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 21 Dec 2024 13:56:51 -0800 Subject: [PATCH 16/31] ctypes approach --- Cargo.toml | 4 +- klamath_rs_ext/__init__.py | 2 +- klamath_rs_ext/basic.py | 71 +++++++++++++++++++++------ pyproject.toml | 32 +++++++++++-- src/lib.rs | 98 +++++++++++--------------------------- 5 files changed, 115 insertions(+), 92 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 41cbb68..a211306 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "klamath_rs_ext" -version = "0.1.0" +version = "0.2.0" authors = ["jan "] edition = "2021" @@ -12,5 +12,3 @@ crate-type = ["cdylib", "rlib"] [dependencies] byteorder = "^1" -pyo3 = "^0" -numpy = "^0" diff --git a/klamath_rs_ext/__init__.py b/klamath_rs_ext/__init__.py index fa02fb7..d042d92 100644 --- a/klamath_rs_ext/__init__.py +++ b/klamath_rs_ext/__init__.py @@ -2,5 +2,5 @@ from .basic import pack_int2 as pack_int2 from .basic import pack_int4 as pack_int4 -__version__ = 0.1 +__version__ = 0.2 diff --git a/klamath_rs_ext/basic.py b/klamath_rs_ext/basic.py index 7237b1c..838b42f 100644 --- a/klamath_rs_ext/basic.py +++ b/klamath_rs_ext/basic.py @@ -1,24 +1,68 @@ from collections.abc import Sequence +import ctypes +from pathlib import Path +from itertools import chain import numpy from numpy.typing import NDArray -from .klamath_rs_ext import arr_to_int2, arr_to_int4 +so_path = Path(__file__).resolve().parent / 'libklamath_rs_ext.so' +clib = ctypes.CDLL(so_path) + + +CONV_TABLE_i16 = { + numpy.float64: clib.f64_to_i16, + numpy.float32: clib.f32_to_i16, + numpy.int64: clib.i64_to_i16, + numpy.int32: clib.i32_to_i16, + numpy.int16: clib.i16_to_i16, + numpy.uint64: clib.u64_to_i16, + numpy.uint32: clib.u32_to_i16, + numpy.uint16: clib.u16_to_i16, + } + +CONV_TABLE_i32 = { + numpy.float64: clib.f64_to_i32, + numpy.float32: clib.f32_to_i32, + numpy.int64: clib.i64_to_i32, + numpy.int32: clib.i32_to_i32, + numpy.uint64: clib.u64_to_i32, + numpy.uint32: clib.u32_to_i32, + } + +clib.f64_to_i16.restype = ctypes.c_double +clib.f32_to_i16.restype = ctypes.c_float +clib.i64_to_i16.restype = ctypes.c_int64 +clib.i32_to_i16.restype = ctypes.c_int32 +clib.i16_to_i16.restype = ctypes.c_int16 +clib.u64_to_i16.restype = ctypes.c_uint64 +clib.u32_to_i16.restype = ctypes.c_uint32 +clib.u16_to_i16.restype = ctypes.c_uint16 + +clib.f64_to_i32.restype = ctypes.c_double +clib.f32_to_i32.restype = ctypes.c_float +clib.i64_to_i32.restype = ctypes.c_int64 +clib.i32_to_i32.restype = ctypes.c_int32 +clib.u64_to_i32.restype = ctypes.c_uint64 +clib.u32_to_i32.restype = ctypes.c_uint32 + + +for fn in chain(CONV_TABLE_i16.values(), CONV_TABLE_i32.values()): + fn.argtypes = [ctypes.POINTER(fn.restype), ctypes.c_size_t] + def pack_int2(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: arr = numpy.asarray(data) - if arr.dtype in ( - numpy.float64, numpy.float32, - numpy.int64, numpy.uint64, - numpy.int32, numpy.uint32, - numpy.int16, numpy.uint16, - ): + if arr.dtype in CONV_TABLE_i16.keys(): arr = numpy.require(arr, requirements=('C_CONTIGUOUS', 'ALIGNED', 'WRITEABLE', 'OWNDATA')) if arr is data: arr = numpy.array(arr, copy=True) - arr_to_int2(arr) + + fn = CONV_TABLE_i16[arr.dtype] + result = fn(arr.ctypes.data_as(fn.argtypes[0]), arr.size) + i2arr = arr.view('>i2')[::arr.itemsize // 2] return i2arr.tobytes() @@ -34,15 +78,14 @@ def pack_int2(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: def pack_int4(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: arr = numpy.asarray(data) - if arr.dtype in ( - numpy.float64, numpy.float32, - numpy.int64, numpy.uint64, - numpy.int32, numpy.uint32, - ): + if arr.dtype in CONV_TABLE_i32.keys(): arr = numpy.require(arr, requirements=('C_CONTIGUOUS', 'ALIGNED', 'WRITEABLE', 'OWNDATA')) if arr is data: arr = numpy.array(arr, copy=True) - arr_to_int4(arr) + + fn = CONV_TABLE_i32[arr.dtype] + result = fn(arr.ctypes.data_as(fn.argtypes[0]), arr.size) + i4arr = arr.view('>i4')[::arr.itemsize // 4] return i4arr.tobytes() diff --git a/pyproject.toml b/pyproject.toml index 9704ea5..97d888c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,32 @@ requires = ["maturin>1.0,<2.0"] build-backend = "maturin" - -[tool.maturin] -features = ["pyo3/extension-module"] +[project] +name = "klamath_rs_ext" +description = "Compiled extensions for klamath GDS library" +#readme = "README.md" +#license = { file = "LICENSE.md" } +authors = [ + { name="Jan Petykiewicz", email="jan@mpxd.net" }, + ] +homepage = "https://mpxd.net/code/jan/klamath-rs" +repository = "https://mpxd.net/code/jan/klamath-rs" +classifiers = [ + "Programming Language :: Python :: 3", + "Development Status :: 4 - Beta", +# "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Manufacturing", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Topic :: Scientific/Engineering :: Electronic Design Automation (EDA)", + ] +requires-python = ">=3.11" +#include = [ +# "LICENSE.md" +# ] +dynamic = ["version"] +dependencies = [ + "cffi", + ] diff --git a/src/lib.rs b/src/lib.rs index b64ab5d..84e8551 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,87 +7,43 @@ pub mod elements; pub mod library; -//use ndarray; -use numpy::{PyArray1, PyUntypedArray, PyUntypedArrayMethods, PyArrayDescrMethods, PyArrayMethods, dtype}; -use pyo3::prelude::{Python, pymodule, PyModule, PyResult, Bound, wrap_pyfunction, pyfunction, PyModuleMethods, PyAnyMethods}; -use pyo3::exceptions::{PyValueError, PyTypeError}; +use rust_util::ToInt2BE; +use rust_util::ToInt4BE; -#[pymodule] -fn klamath_rs_ext(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_function(wrap_pyfunction!(arr_to_int2, m)?)?; - m.add_function(wrap_pyfunction!(arr_to_int4, m)?)?; - Ok(()) -} - - -#[pyfunction] -fn arr_to_int2(py: Python<'_>, pyarr: &Bound<'_, PyUntypedArray>) -> PyResult<()> { - use rust_util::ToInt2BE; - - assert!(pyarr.is_c_contiguous(), "Array must be c-contiguous!"); - - macro_rules! i2if { - ( $el_type:expr, $tt:ty ) => { - if $el_type.is_equiv_to(&dtype::<$tt>(py)) { - let arr = pyarr.downcast::>()?; - let mut array = unsafe { arr.as_array_mut() }; - for xx in array.iter_mut() { - *xx = <$tt>::convert_to_i2be(*xx).map_err( - |e| PyValueError::new_err(format!("Invalid value for 2-byte int: {}", e)) - )?; +macro_rules! mkfun { + ( $fname:ident, $tt:ty, $elfn:ident ) => { + #[no_mangle] + pub extern "C" fn $fname(arr: *mut $tt, size: usize) -> $tt { + let sl = unsafe { std::slice::from_raw_parts_mut(arr, size) }; + for xx in sl.iter_mut() { + let res = <$tt>::$elfn(*xx); + match res { + Err(cc) => return cc, + Ok(cc) => { *xx = cc; }, } - return Ok(()) } + 0 as $tt } } - - - let el_type = pyarr.dtype(); - i2if!(el_type, f64); - i2if!(el_type, f32); - i2if!(el_type, i64); - i2if!(el_type, u64); - i2if!(el_type, i32); - i2if!(el_type, u32); - i2if!(el_type, i16); - i2if!(el_type, u16); - - Err(PyTypeError::new_err(format!("arr_to_int2 not implemented for type {:?}", el_type))) } -#[pyfunction] -fn arr_to_int4(py: Python<'_>, pyarr: &Bound<'_, PyUntypedArray>) -> PyResult<()> { - use rust_util::ToInt4BE; +mkfun!(f64_to_i16, f64, convert_to_i2be); +mkfun!(f32_to_i16, f32, convert_to_i2be); +mkfun!(i64_to_i16, i64, convert_to_i2be); +mkfun!(u64_to_i16, u64, convert_to_i2be); +mkfun!(i32_to_i16, i32, convert_to_i2be); +mkfun!(u32_to_i16, u32, convert_to_i2be); +mkfun!(i16_to_i16, i16, convert_to_i2be); +mkfun!(u16_to_i16, u16, convert_to_i2be); - assert!(pyarr.is_c_contiguous(), "Array must be c-contiguous!"); - - macro_rules! i4if { - ( $el_type:expr, $tt:ty ) => { - if $el_type.is_equiv_to(&dtype::<$tt>(py)) { - let arr = pyarr.downcast::>()?; - let mut array = unsafe { arr.as_array_mut() }; - for xx in array.iter_mut() { - *xx = <$tt>::convert_to_i4be(*xx).map_err( - |e| PyValueError::new_err(format!("Invalid value for 4-byte int: {}", e)) - )?; - } - return Ok(()) - } - } - } - - let el_type = pyarr.dtype(); - i4if!(el_type, f64); - i4if!(el_type, f32); - i4if!(el_type, i64); - i4if!(el_type, u64); - i4if!(el_type, i32); - i4if!(el_type, u32); - - Err(PyTypeError::new_err(format!("arr_to_int4 not implemented for type {:?}", el_type))) -} +mkfun!(f64_to_i32, f64, convert_to_i4be); +mkfun!(f32_to_i32, f32, convert_to_i4be); +mkfun!(i64_to_i32, i64, convert_to_i4be); +mkfun!(u64_to_i32, u64, convert_to_i4be); +mkfun!(i32_to_i32, i32, convert_to_i4be); +mkfun!(u32_to_i32, u32, convert_to_i4be); mod rust_util { From bac31e2ce634af549f876a1414d2aa40251209f9 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 21 Dec 2024 15:42:50 -0800 Subject: [PATCH 17/31] CFFI + maturin approach --- klamath_rs_ext/basic.py | 75 +++++++++---------- pyproject.toml | 4 + src/lib.rs | 157 ++++++++++++++++------------------------ 3 files changed, 99 insertions(+), 137 deletions(-) diff --git a/klamath_rs_ext/basic.py b/klamath_rs_ext/basic.py index 838b42f..4540fe6 100644 --- a/klamath_rs_ext/basic.py +++ b/klamath_rs_ext/basic.py @@ -6,62 +6,47 @@ from itertools import chain import numpy from numpy.typing import NDArray -so_path = Path(__file__).resolve().parent / 'libklamath_rs_ext.so' -clib = ctypes.CDLL(so_path) +from .klamath_rs_ext import lib, ffi CONV_TABLE_i16 = { - numpy.float64: clib.f64_to_i16, - numpy.float32: clib.f32_to_i16, - numpy.int64: clib.i64_to_i16, - numpy.int32: clib.i32_to_i16, - numpy.int16: clib.i16_to_i16, - numpy.uint64: clib.u64_to_i16, - numpy.uint32: clib.u32_to_i16, - numpy.uint16: clib.u16_to_i16, + numpy.float64: lib.f64_to_i16, + numpy.float32: lib.f32_to_i16, + numpy.int64: lib.i64_to_i16, + numpy.int32: lib.i32_to_i16, + numpy.int16: lib.i16_to_i16, + numpy.uint64: lib.u64_to_i16, + numpy.uint32: lib.u32_to_i16, + numpy.uint16: lib.u16_to_i16, } CONV_TABLE_i32 = { - numpy.float64: clib.f64_to_i32, - numpy.float32: clib.f32_to_i32, - numpy.int64: clib.i64_to_i32, - numpy.int32: clib.i32_to_i32, - numpy.uint64: clib.u64_to_i32, - numpy.uint32: clib.u32_to_i32, + numpy.float64: lib.f64_to_i32, + numpy.float32: lib.f32_to_i32, + numpy.int64: lib.i64_to_i32, + numpy.int32: lib.i32_to_i32, + numpy.uint64: lib.u64_to_i32, + numpy.uint32: lib.u32_to_i32, } -clib.f64_to_i16.restype = ctypes.c_double -clib.f32_to_i16.restype = ctypes.c_float -clib.i64_to_i16.restype = ctypes.c_int64 -clib.i32_to_i16.restype = ctypes.c_int32 -clib.i16_to_i16.restype = ctypes.c_int16 -clib.u64_to_i16.restype = ctypes.c_uint64 -clib.u32_to_i16.restype = ctypes.c_uint32 -clib.u16_to_i16.restype = ctypes.c_uint16 - -clib.f64_to_i32.restype = ctypes.c_double -clib.f32_to_i32.restype = ctypes.c_float -clib.i64_to_i32.restype = ctypes.c_int64 -clib.i32_to_i32.restype = ctypes.c_int32 -clib.u64_to_i32.restype = ctypes.c_uint64 -clib.u32_to_i32.restype = ctypes.c_uint32 - - -for fn in chain(CONV_TABLE_i16.values(), CONV_TABLE_i32.values()): - fn.argtypes = [ctypes.POINTER(fn.restype), ctypes.c_size_t] - def pack_int2(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: arr = numpy.asarray(data) - if arr.dtype in CONV_TABLE_i16.keys(): + for dtype in CONV_TABLE_i16.keys(): + if arr.dtype != dtype: + continue + arr = numpy.require(arr, requirements=('C_CONTIGUOUS', 'ALIGNED', 'WRITEABLE', 'OWNDATA')) if arr is data: arr = numpy.array(arr, copy=True) - fn = CONV_TABLE_i16[arr.dtype] - result = fn(arr.ctypes.data_as(fn.argtypes[0]), arr.size) + fn = CONV_TABLE_i16[dtype] + result = fn(ffi.from_buffer(arr), arr.size) + + if result != 0: + raise ValueError(f'Invalid value for conversion to Int2: {result}') i2arr = arr.view('>i2')[::arr.itemsize // 2] return i2arr.tobytes() @@ -78,13 +63,19 @@ def pack_int2(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: def pack_int4(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: arr = numpy.asarray(data) - if arr.dtype in CONV_TABLE_i32.keys(): + for dtype in CONV_TABLE_i32.keys(): + if arr.dtype != dtype: + continue + arr = numpy.require(arr, requirements=('C_CONTIGUOUS', 'ALIGNED', 'WRITEABLE', 'OWNDATA')) if arr is data: arr = numpy.array(arr, copy=True) - fn = CONV_TABLE_i32[arr.dtype] - result = fn(arr.ctypes.data_as(fn.argtypes[0]), arr.size) + fn = CONV_TABLE_i32[dtype] + result = fn(ffi.from_buffer(arr), arr.size) + + if result != 0: + raise ValueError(f'Invalid value for conversion to Int4: {result}') i4arr = arr.view('>i4')[::arr.itemsize // 4] return i4arr.tobytes() diff --git a/pyproject.toml b/pyproject.toml index 97d888c..06aa1af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,3 +31,7 @@ dynamic = ["version"] dependencies = [ "cffi", ] + +[tool.maturin] +bindings = "cffi" + diff --git a/src/lib.rs b/src/lib.rs index 84e8551..cf8365c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,21 +7,21 @@ pub mod elements; pub mod library; -use rust_util::ToInt2BE; -use rust_util::ToInt4BE; +use byteorder::{ByteOrder, BigEndian}; +use std::mem::size_of; -macro_rules! mkfun { - ( $fname:ident, $tt:ty, $elfn:ident ) => { - #[no_mangle] - pub extern "C" fn $fname(arr: *mut $tt, size: usize) -> $tt { - let sl = unsafe { std::slice::from_raw_parts_mut(arr, size) }; +macro_rules! impl_i16be { + ( $tt:ty, $arr:ident, $size:ident ) => { + { + let sl = unsafe { std::slice::from_raw_parts_mut($arr, $size) }; for xx in sl.iter_mut() { - let res = <$tt>::$elfn(*xx); - match res { - Err(cc) => return cc, - Ok(cc) => { *xx = cc; }, - } + if *xx < i16::MIN as $tt { return *xx } + if *xx > i16::MAX as $tt { return *xx } + + let mut buf = [0; size_of::<$tt>()]; + BigEndian::write_i16(&mut buf, *xx as i16); + *xx = <$tt>::from_le_bytes(buf); } 0 as $tt } @@ -29,91 +29,58 @@ macro_rules! mkfun { } -mkfun!(f64_to_i16, f64, convert_to_i2be); -mkfun!(f32_to_i16, f32, convert_to_i2be); -mkfun!(i64_to_i16, i64, convert_to_i2be); -mkfun!(u64_to_i16, u64, convert_to_i2be); -mkfun!(i32_to_i16, i32, convert_to_i2be); -mkfun!(u32_to_i16, u32, convert_to_i2be); -mkfun!(i16_to_i16, i16, convert_to_i2be); -mkfun!(u16_to_i16, u16, convert_to_i2be); +macro_rules! impl_i32be { + ( $tt:ty, $arr:ident, $size:ident ) => { + { + let sl = unsafe { std::slice::from_raw_parts_mut($arr, $size) }; + for xx in sl.iter_mut() { + if *xx < i32::MIN as $tt { return *xx } + if *xx > i32::MAX as $tt { return *xx } -mkfun!(f64_to_i32, f64, convert_to_i4be); -mkfun!(f32_to_i32, f32, convert_to_i4be); -mkfun!(i64_to_i32, i64, convert_to_i4be); -mkfun!(u64_to_i32, u64, convert_to_i4be); -mkfun!(i32_to_i32, i32, convert_to_i4be); -mkfun!(u32_to_i32, u32, convert_to_i4be); - - -mod rust_util { - use byteorder::{ByteOrder, BigEndian}; - use std::mem::size_of; - - pub trait ToInt2BE { - fn convert_to_i2be(ii: Self) -> Result where Self: Sized; - } - - pub trait ToInt4BE { - fn convert_to_i4be(ii: Self) -> Result where Self: Sized; - } - - macro_rules! impl_i2be { - ( $tt:ty ) => { - impl ToInt2BE for $tt { - fn convert_to_i2be(ii: $tt) -> Result<$tt, $tt> { - if ii < i16::MIN as $tt { return Err(ii); } - if ii > i16::MAX as $tt { return Err(ii); } - - let mut buf = [0; size_of::<$tt>()]; - BigEndian::write_i16(&mut buf, ii as i16); - Ok(<$tt>::from_le_bytes(buf)) - } + let mut buf = [0; size_of::<$tt>()]; + BigEndian::write_i32(&mut buf, *xx as i32); + *xx = <$tt>::from_le_bytes(buf); } + 0 as $tt } } - - macro_rules! impl_i4be { - ( $tt:ty ) => { - impl ToInt4BE for $tt { - fn convert_to_i4be(ii: $tt) -> Result<$tt, $tt> { - if ii < i32::MIN as $tt { return Err(ii); } - if ii > i32::MAX as $tt { return Err(ii); } - - let mut buf = [0; size_of::<$tt>()]; - BigEndian::write_i32(&mut buf, ii as i32); - Ok(<$tt>::from_le_bytes(buf)) - } - } - } - } - - impl_i2be!(f64); - impl_i4be!(f64); - - impl_i2be!(f32); - impl_i4be!(f32); - - impl_i2be!(i64); - impl_i4be!(i64); - impl_i2be!(u64); - impl_i4be!(u64); - - impl_i2be!(i32); - impl_i4be!(i32); - impl_i2be!(u32); - impl_i4be!(u32); - - impl_i2be!(i16); - impl_i2be!(u16); - - // Does not fit - //impl_i4be!(i16); - //impl_i4be!(u16); - // - //impl_i2be!(i8); - //impl_i4be!(i8); - //impl_i2be!(u8); - //impl_i4be!(u8); - } + + +#[no_mangle] +pub extern "C" fn f64_to_i16(arr: *mut f64, size: usize) -> f64 { impl_i16be!(f64, arr, size) } +#[no_mangle] +pub extern "C" fn f64_to_i32(arr: *mut f64, size: usize) -> f64 { impl_i32be!(f64, arr, size) } + +#[no_mangle] +pub extern "C" fn f32_to_i16(arr: *mut f32, size: usize) -> f32 { impl_i16be!(f32, arr, size) } +#[no_mangle] +pub extern "C" fn f32_to_i32(arr: *mut f32, size: usize) -> f32 { impl_i32be!(f32, arr, size) } + +#[no_mangle] +pub extern "C" fn u64_to_i16(arr: *mut u64, size: usize) -> u64 { impl_i16be!(u64, arr, size) } +#[no_mangle] +pub extern "C" fn u64_to_i32(arr: *mut u64, size: usize) -> u64 { impl_i32be!(u64, arr, size) } + +#[no_mangle] +pub extern "C" fn i64_to_i16(arr: *mut i64, size: usize) -> i64 { impl_i16be!(i64, arr, size) } +#[no_mangle] +pub extern "C" fn i64_to_i32(arr: *mut i64, size: usize) -> i64 { impl_i32be!(i64, arr, size) } + +#[no_mangle] +pub extern "C" fn u32_to_i16(arr: *mut u32, size: usize) -> u32 { impl_i16be!(u32, arr, size) } +#[no_mangle] +pub extern "C" fn u32_to_i32(arr: *mut u32, size: usize) -> u32 { impl_i32be!(u32, arr, size) } + +#[no_mangle] +pub extern "C" fn i32_to_i16(arr: *mut i32, size: usize) -> i32 { impl_i16be!(i32, arr, size) } +#[no_mangle] +pub extern "C" fn i32_to_i32(arr: *mut i32, size: usize) -> i32 { impl_i32be!(i32, arr, size) } + + +#[no_mangle] +pub extern "C" fn u16_to_i16(arr: *mut u16, size: usize) -> u16 { impl_i16be!(u16, arr, size) } + +#[no_mangle] +pub extern "C" fn i16_to_i16(arr: *mut i16, size: usize) -> i16 { impl_i16be!(i16, arr, size) } + From ec659b559d1b380debea840cab7442e048041110 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 21 Dec 2024 16:38:56 -0800 Subject: [PATCH 18/31] fixes for cffi approach --- klamath_rs_ext/basic.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/klamath_rs_ext/basic.py b/klamath_rs_ext/basic.py index 4540fe6..aabc246 100644 --- a/klamath_rs_ext/basic.py +++ b/klamath_rs_ext/basic.py @@ -32,7 +32,7 @@ CONV_TABLE_i32 = { def pack_int2(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: - arr = numpy.asarray(data) + arr = numpy.atleast_1d(data) for dtype in CONV_TABLE_i16.keys(): if arr.dtype != dtype: @@ -43,7 +43,8 @@ def pack_int2(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: arr = numpy.array(arr, copy=True) fn = CONV_TABLE_i16[dtype] - result = fn(ffi.from_buffer(arr), arr.size) + buf = ffi.from_buffer(ffi.typeof(fn).args[0], arr, require_writable=True) + result = fn(buf, arr.size) if result != 0: raise ValueError(f'Invalid value for conversion to Int2: {result}') @@ -61,7 +62,7 @@ def pack_int2(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: def pack_int4(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: - arr = numpy.asarray(data) + arr = numpy.atleast_1d(data) for dtype in CONV_TABLE_i32.keys(): if arr.dtype != dtype: @@ -72,7 +73,8 @@ def pack_int4(data: NDArray[numpy.integer] | Sequence[int] | int) -> bytes: arr = numpy.array(arr, copy=True) fn = CONV_TABLE_i32[dtype] - result = fn(ffi.from_buffer(arr), arr.size) + buf = ffi.from_buffer(ffi.typeof(fn).args[0], arr, require_writable=True) + result = fn(buf, arr.size) if result != 0: raise ValueError(f'Invalid value for conversion to Int4: {result}') From 4a7bc8090a2ffa4580fe40be0eb2155562016568 Mon Sep 17 00:00:00 2001 From: jan Date: Thu, 10 Apr 2025 01:07:11 -0700 Subject: [PATCH 19/31] arrow attempt compiles, nothing tested yet --- Cargo.toml | 1 + src/elements.rs | 579 ++++++++++++++++++++++++++++++++++++++++++++++-- src/library.rs | 11 +- src/record.rs | 108 +++++---- 4 files changed, 640 insertions(+), 59 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a211306..d878d23 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ crate-type = ["cdylib", "rlib"] [dependencies] byteorder = "^1" +arrow = "*" diff --git a/src/elements.rs b/src/elements.rs index b711da2..479ea3b 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -3,7 +3,8 @@ /// structure references) and associated properties. /// -use crate::records::{BOX, BOUNDARY, NODE, PATH, TEXT, SREF, AREF, +use crate::records::{ + //BOX, BOUNDARY, NODE, PATH, TEXT, SREF, AREF, DATATYPE, PATHTYPE, BOXTYPE, NODETYPE, TEXTTYPE, LAYER, XY, WIDTH, COLROW, PRESENTATION, STRING, STRANS, MAG, ANGLE, PROPATTR, PROPVALUE, @@ -12,10 +13,550 @@ use crate::records::{BOX, BOUNDARY, NODE, PATH, TEXT, SREF, AREF, use crate::records; use crate::record::{RecordHeader, Record}; -use crate::basic::{OResult, IResult, fail}; +use crate::basic::{IResult, fail, take_bytes}; //OResult +use std::string::String; use std::collections::HashMap; -use std::io::Write; +//use std::io::Write; + +use std::sync::Arc; +use arrow::datatypes::{DataType, Field, Fields}; +use arrow::array::{ + StructBuilder, ListBuilder, StringBuilder, ArrayBuilder, Float64Builder, BooleanBuilder, + Int32Builder, Int16Builder, UInt64Builder, UInt32Builder, UInt8Builder, + StructArray, + }; + + +type DListBuilder = ListBuilder>; + + +pub fn read_library(input: &[u8]) -> IResult { + let input_size = input.len(); + + let property_t = DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int16, false), + Field::new("value", DataType::Utf8, false), + ])); + + let property_list_t = DataType::List(Arc::new( + Field::new_list_field(property_t, false) + )); + + + let repetition_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("x0", DataType::Int32, false), + Field::new("y0", DataType::Int32, false), + Field::new("x1", DataType::Int32, false), + Field::new("y1", DataType::Int32, false), + Field::new("count0", DataType::Int16, false), + Field::new("count1", DataType::Int16, false), + ])); + + + let ref_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, true), + Field::new("mag", DataType::Float64, true), + Field::new("angle_deg", DataType::Float64, true), + Field::new("x", DataType::Int32, false), + Field::new("y", DataType::Int32, false), + Field::new("repetition", repetition_struct_t, true), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let text_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("presentation_horiz", DataType::UInt8, true), + Field::new("presentation_vert", DataType::UInt8, true), + Field::new("presentation_font", DataType::UInt8, true), + Field::new("path_type", DataType::Int16, true), + Field::new("width", DataType::Int32, true), + Field::new("invert_y", DataType::Boolean, true), + Field::new("mag", DataType::Float64, true), + Field::new("angle_deg", DataType::Float64, true), + Field::new("x", DataType::Int32, false), + Field::new("y", DataType::Int32, false), + Field::new("string", DataType::Utf8, false), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let coords_t = DataType::List(Arc::new( + Field::new_list_field(DataType::Int32, false) + )); + + let boundary_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + let path_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("path_type", DataType::Int16, false), + Field::new("extension_start", DataType::Int32, true), + Field::new("extension_end", DataType::Int32, true), + Field::new("width", DataType::Int32, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + let boxnode_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let ref_list_t = DataType::List(Arc::new( + Field::new_list_field(ref_struct_t, false) + )); + + let text_list_t = DataType::List(Arc::new( + Field::new_list_field(text_struct_t, false) + )); + + let boundary_list_t = DataType::List(Arc::new( + Field::new_list_field(boundary_struct_t, false) + )); + + let path_list_t = DataType::List(Arc::new( + Field::new_list_field(path_struct_t, false) + )); + + let boxnode_list_t = DataType::List(Arc::new( + Field::new_list_field(boxnode_struct_t, false) + )); + + + let cell_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("id", DataType::UInt32, false), + Field::new("file_offset", DataType::UInt64, false), + Field::new("refs", ref_list_t, false), + Field::new("boundaries", boundary_list_t, false), + Field::new("paths", path_list_t, false), + Field::new("nodes", boxnode_list_t.clone(), true), + Field::new("boxes", boxnode_list_t.clone(), true), + Field::new("texts", text_list_t, false), + ])); + + let mut lib_builder = StructBuilder::from_fields(vec![ + Field::new("cell_names", DataType::Utf8, false), + Field::new("cells", cell_struct_t, false), + ], + 0, + ); + + let cells_builder = lib_builder.field_builder::(0).unwrap(); + + + let mut names = HashMap::::new(); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDLIB { + (input, _) = take_bytes(input, header.data_size)?; + if header.tag == records::RTAG_BGNSTR { + let name_bytes; + (input, name_bytes) = records::STRNAME::read(input)?; + let name = String::from_utf8(name_bytes).unwrap(); + + let next_id = names.len(); + let id = names.entry(name).or_insert(next_id.try_into().unwrap()); + let position = input_size - input.len(); + + let cell_builder = cells_builder.values().as_any_mut().downcast_mut::().unwrap(); + let id_builder = cell_builder.field_builder::(0).unwrap(); + id_builder.append_value(*id); + + let offset_builder = cell_builder.field_builder::(1).unwrap(); + offset_builder.append_value(position.try_into().unwrap()); + + (input, _) = read_elements(input, cell_builder, &mut names)?; + + cells_builder.append(true); + } + (input, header) = RecordHeader::read(input)?; + } + + let mut ids: HashMap = names.into_iter().map(|(kk, vv)| (vv, kk)).collect(); + let names_builder = lib_builder.field_builder::(1).unwrap(); + for id in 0..ids.len() { + names_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); + } + + let lib = lib_builder.finish(); + Ok((input, lib)) +} + + +pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, names: &mut HashMap) -> IResult<'a, ()> { + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDSTR { + match header.tag { + records::RTAG_BOUNDARY => {(input, _) = read_boundary(input, cell_builder)?;}, + records::RTAG_PATH => {read_path(input, cell_builder)?;}, + records::RTAG_NODE => {read_boxnode(input, cell_builder, header.tag)?;}, + records::RTAG_BOX => {read_boxnode(input, cell_builder, header.tag)?;}, + records::RTAG_TEXT => {read_text(input, cell_builder)?;}, + records::RTAG_SREF => {read_ref(input, cell_builder, header.tag, names)?;}, + records::RTAG_AREF => {read_ref(input, cell_builder, header.tag, names)?;}, + _ => { + // don't care, skip + (input, _) = take_bytes(input, header.data_size)?; + } + } + (input, header) = RecordHeader::read(input)?; + } + Ok((input, ())) +} + +pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> { + let boundaries_builder = cell_builder.field_builder::(3).unwrap(); + let boundary_builder = boundaries_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let (input, _) = records::BOUNDARY::read(input)?; + + let (input, layer) = LAYER::skip_and_read(input)?; + let layer_builder = boundary_builder.field_builder::(0).unwrap(); + layer_builder.append_value(layer); + + let (input, dtype) = DATATYPE::read(input)?; + let dtype_builder = boundary_builder.field_builder::(1).unwrap(); + dtype_builder.append_value(dtype); + + let xys_builder = boundary_builder.field_builder::(2).unwrap(); + let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); + let (input, xy_iter) = XY::read(input)?; + for xy in xy_iter { + xy_builder.append_value(xy); + } + xys_builder.append(true); + + let props_builder = boundary_builder.field_builder::(3).unwrap(); + let (input, ()) = read_properties(input, props_builder)?; + + boundary_builder.append(true); + boundaries_builder.append(true); + Ok((input, ())) +} + + +pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> { + let paths_builder = cell_builder.field_builder::(4).unwrap(); + let path_builder = paths_builder.values().as_any_mut().downcast_mut::().unwrap(); + + + let (input, _) = records::PATH::read(input)?; + + let (input, layer) = LAYER::skip_and_read(input)?; + let (input, dtype) = DATATYPE::read(input)?; + let layer_builder = path_builder.field_builder::(0).unwrap(); + layer_builder.append_value(layer); + let dtype_builder = path_builder.field_builder::(1).unwrap(); + dtype_builder.append_value(dtype); + + let mut path_type = None; + let mut width = None; + let mut bgn_ext = None; + let mut end_ext = None; + + let (mut input, mut header) = RecordHeader::read(&input)?; + while header.tag != records::RTAG_XY { + match header.tag { + records::RTAG_PATHTYPE => { + let _path_type; + (input, _path_type) = PATHTYPE::read_data(input, header.data_size)?; + path_type = Some(_path_type); + }, + records::RTAG_WIDTH => { + let _width; + (input, _width) = WIDTH::read_data(input, header.data_size)?; + width = Some(_width); + }, + records::RTAG_BGNEXTN => { + let _bgn_ext; + (input, _bgn_ext) = BGNEXTN::read_data(input, header.data_size)?; + bgn_ext = Some(_bgn_ext); + }, + records::RTAG_ENDEXTN => { + let _end_ext; + (input, _end_ext) = ENDEXTN::read_data(input, header.data_size)?; + end_ext = Some(_end_ext); + }, + _ => + return fail(input, format!("Unexpected tag {:04x}", header.tag)), + }; + (input, header) = RecordHeader::read(&input)?; + } + let path_type_builder = path_builder.field_builder::(2).unwrap(); + path_type_builder.append_option(path_type); + let ext0_builder = path_builder.field_builder::(3).unwrap(); + ext0_builder.append_option(bgn_ext); + let ext1_builder = path_builder.field_builder::(4).unwrap(); + ext1_builder.append_option(end_ext); + let width_builder = path_builder.field_builder::(5).unwrap(); + width_builder.append_option(width); + + let xys_builder = path_builder.field_builder::(6).unwrap(); + let (input, xy_iter) = XY::read(input)?; + for xy in xy_iter { + let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); + xy_builder.append_value(xy); + } + xys_builder.append(true); + + let props_builder = path_builder.field_builder::(7).unwrap(); + let (input, ()) = read_properties(input, props_builder)?; + + path_builder.append(true); + paths_builder.append(true); + Ok((input, ())) +} + +pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16) -> IResult<'a, ()> { + let field_num = match tag { + records::RTAG_NODE => 5, + records::RTAG_BOX => 6, + _ => return fail(input, format!("Unexpected tag {:04x}", tag)), + }; + + let boxnodes_builder = cell_builder.field_builder::(field_num).unwrap(); + let boxnode_builder = boxnodes_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let (input, _) = match tag { + records::RTAG_NODE => records::NODE::read(input)?, + records::RTAG_BOX => records::BOX::read(input)?, + _ => return fail(input, format!("Unexpected tag {:04x}", tag)), + }; + + let layer_builder = boxnode_builder.field_builder::(0).unwrap(); + let (input, layer) = LAYER::skip_and_read(input)?; + layer_builder.append_value(layer); + + let (input, dtype) = match tag { + records::RTAG_NODE => NODETYPE::read(input)?, + records::RTAG_BOX => BOXTYPE::read(input)?, + _ => return fail(input, format!("Unexpected tag {:04x}", tag)), + }; + let dtype_builder = boxnode_builder.field_builder::(1).unwrap(); + dtype_builder.append_value(dtype); + + let xys_builder = boxnode_builder.field_builder::(2).unwrap(); + let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); + let (input, xy_iter) = XY::read(input)?; + for xy in xy_iter { + xy_builder.append_value(xy); + } + xys_builder.append(true); + + let props_builder = boxnode_builder.field_builder::(3).unwrap(); + let (input, ()) = read_properties(input, props_builder)?; + + boxnode_builder.append(true); + boxnodes_builder.append(true); + Ok((input, ())) +} + +pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> { + let texts_builder = cell_builder.field_builder::(7).unwrap(); + let text_builder = texts_builder.values().as_any_mut().downcast_mut::().unwrap(); + + + let mut path_type = None; + let mut pres_hori = None; + let mut pres_vert = None; + let mut pres_font = None; + let mut invert_y = None; + let mut width = None; + let mut mag = None; + let mut angle_deg = None; + + let (input, layer) = LAYER::skip_and_read(input)?; + let layer_builder = text_builder.field_builder::(0).unwrap(); + layer_builder.append_value(layer); + + let (input, dtype) = TEXTTYPE::read(input)?; + let dtype_builder = text_builder.field_builder::(1).unwrap(); + dtype_builder.append_value(dtype); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_XY { + match header.tag { + // TODO warn if repeat tags? + records::RTAG_PRESENTATION => { + let _presentation; + (input, _presentation) = PRESENTATION::read_data(input, header.data_size)?; + pres_hori = Some(_presentation[14] as u8 * 2 + _presentation[15] as u8); + pres_vert = Some(_presentation[12] as u8 * 2 + _presentation[13] as u8); + pres_font = Some(_presentation[10] as u8 * 2 + _presentation[11] as u8); + }, + records::RTAG_PATHTYPE => { + let _path_type; + (input, _path_type) = PATHTYPE::read_data(input, header.data_size)?; + path_type = Some(_path_type); + }, + records::RTAG_WIDTH => { + let _width; + (input, _width) = WIDTH::read_data(input, header.data_size)?; + width = Some(_width); + }, + records::RTAG_STRANS => { + let strans; + (input, strans) = STRANS::read_data(input, header.data_size)?; + invert_y = Some(strans[0]); + }, + records::RTAG_MAG => { + let _mag; + (input, _mag) = MAG::read_data(input, header.data_size)?; + mag = Some(_mag); + }, + records::RTAG_ANGLE => { + let _angle_deg; + (input, _angle_deg) = ANGLE::read_data(input, header.data_size)?; + angle_deg = Some(_angle_deg); + }, + _ => + return fail(input, format!("Unexpected tag {:04x}", header.tag)), + } + (input, header) = RecordHeader::read(input)?; + } + + let pres_hori_builder = text_builder.field_builder::(2).unwrap(); + pres_hori_builder.append_option(pres_hori); + let pres_vert_builder = text_builder.field_builder::(3).unwrap(); + pres_vert_builder.append_option(pres_vert); + let pres_font_builder = text_builder.field_builder::(4).unwrap(); + pres_font_builder.append_option(pres_font); + let path_type_builder = text_builder.field_builder::(5).unwrap(); + path_type_builder.append_option(path_type); + let width_builder = text_builder.field_builder::(6).unwrap(); + width_builder.append_option(width); + let inv_builder = text_builder.field_builder::(7).unwrap(); + inv_builder.append_option(invert_y); + let mag_builder = text_builder.field_builder::(8).unwrap(); + mag_builder.append_option(mag); + let angle_builder = text_builder.field_builder::(9).unwrap(); + angle_builder.append_option(angle_deg); + + let (input, mut xy_iter) = XY::read(input)?; + let x_builder = text_builder.field_builder::(10).unwrap(); + x_builder.append_value(xy_iter.next().unwrap()); + let y_builder = text_builder.field_builder::(11).unwrap(); + y_builder.append_value(xy_iter.next().unwrap()); + + let (input, string_bytes) = STRING::read(input)?; + let string = String::from_utf8(string_bytes).unwrap(); + let string_builder = text_builder.field_builder::(12).unwrap(); + string_builder.append_value(string); + + let props_builder = text_builder.field_builder::(13).unwrap(); + let (input, ()) = read_properties(input, props_builder)?; + + text_builder.append(true); + texts_builder.append(true); + Ok((input, ())) +} + + + +pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, names: &mut HashMap) -> IResult<'a, ()> { + let is_aref = tag == records::RTAG_AREF; + let refs_builder = cell_builder.field_builder::(7).unwrap(); + let ref_builder = refs_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let mut invert_y = None; + let mut mag = None; + let mut angle_deg = None; + let mut colrow = None; + + let (input, struct_name_bytes) = SNAME::skip_and_read(input)?; + let struct_name = String::from_utf8(struct_name_bytes).unwrap(); + let next_id = names.len(); + let id = names.entry(struct_name).or_insert(next_id.try_into().unwrap()); + let target_builder = ref_builder.field_builder::(0).unwrap(); + target_builder.append_value(*id); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_XY { + match header.tag { + records::RTAG_STRANS => { + let strans; + (input, strans) = STRANS::read_data(input, header.data_size)?; + invert_y = Some(strans[0]); + }, + records::RTAG_MAG => { + let _mag; + (input, _mag) = MAG::read_data(input, header.data_size)?; + mag = Some(_mag); + }, + records::RTAG_ANGLE => { + let _angle_deg; + (input, _angle_deg) = ANGLE::read_data(input, header.data_size)?; + angle_deg = Some(_angle_deg); + }, + records::RTAG_COLROW => { + let mut _colrow; + (input, _colrow) = COLROW::read_data(input, header.data_size)?; + colrow = Some((_colrow.next().unwrap(), _colrow.next().unwrap())); + if !is_aref { + return fail(input, "Got a COLROW record inside an SREF".to_string()); + } + }, + _ => + return fail(input, format!("Unexpected tag {:04x}", header.tag)), + }; + (input, header) = RecordHeader::read(input)?; + } + let inv_builder = ref_builder.field_builder::(1).unwrap(); + inv_builder.append_option(invert_y); + let mag_builder = ref_builder.field_builder::(2).unwrap(); + mag_builder.append_option(mag); + let angle_builder = ref_builder.field_builder::(3).unwrap(); + angle_builder.append_option(angle_deg); + + + let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; + let x_builder = ref_builder.field_builder::(4).unwrap(); + x_builder.append_value(xy_iter.next().unwrap()); + let y_builder = ref_builder.field_builder::(5).unwrap(); + y_builder.append_value(xy_iter.next().unwrap()); + + let rep_builder = ref_builder.field_builder::(6).unwrap(); + if is_aref { + let x0_builder = rep_builder.field_builder::(0).unwrap(); + x0_builder.append_value(xy_iter.next().unwrap()); + let y0_builder = rep_builder.field_builder::(1).unwrap(); + y0_builder.append_value(xy_iter.next().unwrap()); + let x1_builder = rep_builder.field_builder::(2).unwrap(); + x1_builder.append_value(xy_iter.next().unwrap()); + let y1_builder = rep_builder.field_builder::(3).unwrap(); + y1_builder.append_value(xy_iter.next().unwrap()); + + match colrow { + None => return fail(input, "AREF without COLROW before XY".to_string()), + Some((count0, count1)) => { + let count0_builder = rep_builder.field_builder::(4).unwrap(); + count0_builder.append_value(count0); + let count1_builder = rep_builder.field_builder::(5).unwrap(); + count1_builder.append_value(count1); + }, + } + } + rep_builder.append(is_aref); + + let props_builder = ref_builder.field_builder::(7).unwrap(); + let (input, ()) = read_properties(input, props_builder)?; + + ref_builder.append(true); + refs_builder.append(true); + Ok((input, ())) +} /// @@ -30,26 +571,35 @@ use std::io::Write; /// Returns: /// propattr: -> propvalue mapping /// -pub fn read_properties(input: &[u8]) -> IResult>> { - let mut properties = HashMap::new(); +pub fn read_properties<'a>(input: &'a [u8], props_builder: &mut DListBuilder) -> IResult<'a, ()> { + let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != ENDEL::tag() { if header.tag == PROPATTR::tag() { - let result = PROPATTR::read_data(input, header.data_size)?; - input = result.0; - let key = result.1; - let result = PROPVALUE::read(input)?; - input = result.0; - let value = result.1; - assert!(!properties.contains_key(&key), "Duplicate property key: {}", key); - properties.insert(key, value); + let key; + let value_bytes; + (input, key) = PROPATTR::read_data(input, header.data_size)?; + (input, value_bytes) = PROPVALUE::read(input)?; + + let value = String::from_utf8(value_bytes).unwrap(); + //assert!(!properties.contains_key(&key), "Duplicate property key: {}", key); + + let key_builder = prop_builder.field_builder::(0).unwrap(); + key_builder.append_value(key); + + let val_builder = prop_builder.field_builder::(1).unwrap(); + val_builder.append_value(value); + + prop_builder.append(true); } (input, header) = RecordHeader::read(input)?; } - Ok((input, properties)) + props_builder.append(true); + Ok((input, ())) } +/* /// /// Write element properties. @@ -530,3 +1080,4 @@ impl Element for Text { Ok(size) } } +*/ diff --git a/src/library.rs b/src/library.rs index e5b5758..ee392ed 100644 --- a/src/library.rs +++ b/src/library.rs @@ -3,13 +3,13 @@ /// use std::io::Write; -use std::collections::HashMap; +//use std::collections::HashMap; pub use crate::record; pub use crate::record::{RecordHeader, Record}; pub use crate::records; pub use crate::elements; -pub use crate::elements::{Element}; +//pub use crate::elements::{Element}; pub use crate::basic::{IResult, OResult, take_bytes, fail}; @@ -90,7 +90,7 @@ impl FileHeader { } } - +/* /// /// Scan through a GDS file, building a table of /// {b'structure_name': byte_offset}. @@ -122,8 +122,9 @@ pub fn scan_structs(input: &[u8]) -> IResult, usize>> { } Ok((input, positions)) } +*/ - +/* #[derive(Debug, Clone)] pub struct Cell { name: Vec, @@ -287,7 +288,7 @@ impl Cell { Ok(size) } } - +*/ /* /// diff --git a/src/record.rs b/src/record.rs index 41921c4..da2e12b 100644 --- a/src/record.rs +++ b/src/record.rs @@ -8,7 +8,7 @@ use byteorder::{ByteOrder, BigEndian}; use crate::basic::{pack_datetime, pack_bitarray, pack_ascii, pack_int2, pack_int4, pack_real8}; #[warn(unused_imports)] use crate::basic::{parse_datetime, parse_bitarray, parse_ascii, parse_int2, parse_int4, parse_real8}; #[warn(unused_imports)] -use crate::basic::{OResult, IResult, fail, parse_u16, take_bytes}; +use crate::basic::{OResult, IResult, fail, parse_u16, take_bytes}; //ErrType, use crate::records; @@ -23,7 +23,7 @@ pub struct RecordHeader { } impl RecordHeader { - pub fn read(input: &[u8]) -> IResult { + pub fn read<'a>(input: &[u8]) -> IResult { let (input, size) = parse_u16(input)?; let (input, tag) = parse_u16(input)?; Ok((input, RecordHeader{tag:tag, data_size:size - 4})) @@ -45,13 +45,13 @@ impl RecordHeader { pub trait RecordData { - type BareData; + type BareData<'a>; type InData : ?Sized; type ByteData : AsRef<[u8]>; - fn read(input: &[u8], size: u16) -> IResult; + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>>; fn pack_into(buf: &mut [u8], data: &Self::InData); - //fn size(data: &Self::BareData) -> u16; + //fn size(data: &Self::BareData<'_>) -> u16; fn pack(data: &Self::InData) -> Self::ByteData; } @@ -79,7 +79,7 @@ pub trait Record { RecordHeader{tag: Self::tag(), data_size: data_size}.write(ww) } - fn read_data(input: &[u8], size: u16) -> IResult { + fn read_data(input: &[u8], size: u16) -> IResult> { RData::read(input, size) } @@ -108,7 +108,7 @@ pub trait Record { Ok((input, true)) } - fn skip_and_read(input: &[u8]) -> IResult { + fn skip_and_read(input: &[u8]) -> IResult> { let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != Self::tag() { (input, _) = take_bytes(input, header.data_size)?; @@ -127,7 +127,7 @@ pub trait Record { } } - fn read(input: &[u8]) -> IResult { + fn read<'a>(input: &'a [u8]) -> IResult<'a, RData::BareData<'a>> { let (input, size) = Self::expect_header(input)?; Self::check_size(size).unwrap(); let (input, data) = Self::read_data(input, size)?; @@ -147,11 +147,11 @@ pub trait Record { pub struct BitArray; impl RecordData for BitArray { - type BareData = [bool; 16]; + type BareData<'a> = [bool; 16]; type InData = [bool; 16]; type ByteData = [u8; 2]; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size == 2); parse_bitarray(input) } @@ -170,11 +170,11 @@ impl RecordData for BitArray { pub struct Int2; impl RecordData for Int2 { - type BareData = i16; + type BareData<'a> = i16; type InData = i16; type ByteData = [u8; 2]; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size == 2); parse_int2(input) } @@ -192,11 +192,11 @@ impl RecordData for Int2 { pub struct Int4; impl RecordData for Int4 { - type BareData = i32; + type BareData<'a> = i32; type InData = i32; type ByteData = [u8; 4]; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size == 4); parse_int4(input) } @@ -215,18 +215,15 @@ impl RecordData for Int4 { pub struct Int2Array; impl RecordData for Int2Array { - type BareData = Vec; + type BareData<'a> = Int2ArrayReader<'a>; type InData = [i16]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size % 2 == 0, "Record must contain an integer quantity of integers"); - let mut buf = Vec::with_capacity(size as usize / 2); - let mut input = input; - for ii in 0..buf.len() { - (input, buf[ii]) = parse_int2(input)?; - } - Ok((input, buf)) + //let mut input = input; + let (input, bytes) = take_bytes(input, size)?; + Ok((input, Int2ArrayReader{bytes: bytes})) } fn pack_into(buf: &mut [u8], data: &Self::InData) { @@ -240,20 +237,34 @@ impl RecordData for Int2Array { } } +pub struct Int2ArrayReader<'a> { + bytes: &'a [u8], +} +impl Iterator for Int2ArrayReader<'_> { + type Item = i16; + + fn next(&mut self) -> Option { + if self.bytes.len() < 2 { + None + } else { + let (remaining, val) = parse_int2(self.bytes).unwrap(); + self.bytes = remaining; + Some(val) + } + } +} + pub struct Int4Array; impl RecordData for Int4Array { - type BareData = Vec; + type BareData<'a> = Int4ArrayReader<'a>; type InData = [i32]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size % 4 == 0, "Record must contain an integer quantity of integers"); - let mut buf = Vec::with_capacity(size as usize / 4); - let mut input = input; - for ii in 0..buf.len() { - (input, buf[ii]) = parse_int4(input)?; - } - Ok((input, buf)) + //let mut input = input; + let (input, bytes) = take_bytes(input, size)?; + Ok((input, Int4ArrayReader{bytes: bytes})) } fn pack_into(buf: &mut [u8], data: &Self::InData) { @@ -267,13 +278,30 @@ impl RecordData for Int4Array { } } +pub struct Int4ArrayReader<'a> { + bytes: &'a [u8], +} +impl Iterator for Int4ArrayReader<'_> { + type Item = i32; + + fn next(&mut self) -> Option { + if self.bytes.len() < 4 { + None + } else { + let (remaining, val) = parse_int4(self.bytes).unwrap(); + self.bytes = remaining; + Some(val) + } + } +} + pub struct Real8; impl RecordData for Real8 { - type BareData = f64; + type BareData<'a> = f64; type InData = f64; type ByteData = [u8; 8]; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size == 8); parse_real8(input) } @@ -291,11 +319,11 @@ impl RecordData for Real8 { pub struct Real8Pair; impl RecordData for Real8Pair { - type BareData = (f64, f64); + type BareData<'a> = (f64, f64); type InData = (f64, f64); type ByteData = [u8; 2 * 8]; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size == 2 * 8); let (input, data0) = parse_real8(input)?; let (input, data1) = parse_real8(input)?; @@ -321,11 +349,11 @@ impl RecordData for Real8Pair { pub struct ASCII; impl RecordData for ASCII { - type BareData = Vec; + type BareData<'a> = Vec; type InData = [u8]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { parse_ascii(input, size) } @@ -343,11 +371,11 @@ impl RecordData for ASCII { pub struct DateTimePair; impl RecordData for DateTimePair { - type BareData = [[i16; 6]; 2]; + type BareData<'a> = [[i16; 6]; 2]; type InData = [[i16; 6]; 2]; type ByteData = [u8; 2 * 6 * 2]; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size == 2 * 6 * 2); let (input, data0) = parse_datetime(input)?; let (input, data1) = parse_datetime(input)?; @@ -373,11 +401,11 @@ impl RecordData for DateTimePair { pub struct Empty; impl RecordData for Empty { - type BareData = (); + type BareData<'a> = (); type InData = (); type ByteData = [u8; 0]; - fn read(input: &[u8], size: u16) -> IResult { + fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { assert!(size == 0); Ok((input, ())) } From 4aa14c4914e42e4eaa31ead81f42b6b123ca15b2 Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 11 Apr 2025 11:18:56 -0700 Subject: [PATCH 20/31] various cleanup and add some example interfaces --- Cargo.toml | 2 +- src/basic.rs | 12 +- src/elements.rs | 179 +------------------- src/iface.rs | 425 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 28 ++-- src/library.rs | 214 +++++++++++++++++++++++- src/misc.py | 40 +++++ src/record.rs | 44 ++--- 8 files changed, 721 insertions(+), 223 deletions(-) create mode 100644 src/iface.rs create mode 100644 src/misc.py diff --git a/Cargo.toml b/Cargo.toml index d878d23..c7deda4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,4 +12,4 @@ crate-type = ["cdylib", "rlib"] [dependencies] byteorder = "^1" -arrow = "*" +arrow = "^54" diff --git a/src/basic.rs b/src/basic.rs index 1019be1..47e2427 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -39,19 +39,19 @@ pub fn take_bytes>(input: &[u8], count: CC) -> IResult<&[u8]> { */ pub fn parse_u16(input: &[u8]) -> IResult { let (input, buf) = take_bytes(input, 2_usize)?; - let val = BigEndian::read_u16(&buf); + let val = BigEndian::read_u16(buf); Ok((input, val)) } pub fn parse_int2(input: &[u8]) -> IResult { let (input, buf) = take_bytes(input, 2_usize)?; - let val = BigEndian::read_i16(&buf); + let val = BigEndian::read_i16(buf); Ok((input, val)) } pub fn parse_int4(input: &[u8]) -> IResult { let (input, buf) = take_bytes(input, 4_usize)?; - let val = BigEndian::read_i32(&buf); + let val = BigEndian::read_i32(buf); Ok((input, val)) } @@ -69,7 +69,7 @@ pub fn decode_real8(int: u64) -> f64 { pub fn parse_real8(input: &[u8]) -> IResult { let (input, buf) = take_bytes(input, 8_usize)?; - let data = BigEndian::read_u64(&buf); + let data = BigEndian::read_u64(buf); Ok((input, decode_real8(data))) } @@ -77,8 +77,8 @@ pub fn parse_real8(input: &[u8]) -> IResult { pub fn parse_datetime(input: &[u8]) -> IResult<[i16; 6]> { let mut buf = [0_i16; 6]; let mut input = input; - for ii in 0..6 { - (input, buf[ii]) = parse_int2(input)?; + for bb in &mut buf { + (input, *bb) = parse_int2(input)?; } buf[0] += 1900; // Year is from 1900 Ok((input, buf)) diff --git a/src/elements.rs b/src/elements.rs index 479ea3b..3e41115 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -19,180 +19,13 @@ use std::string::String; use std::collections::HashMap; //use std::io::Write; -use std::sync::Arc; -use arrow::datatypes::{DataType, Field, Fields}; use arrow::array::{ StructBuilder, ListBuilder, StringBuilder, ArrayBuilder, Float64Builder, BooleanBuilder, - Int32Builder, Int16Builder, UInt64Builder, UInt32Builder, UInt8Builder, - StructArray, + Int32Builder, Int16Builder, UInt32Builder, UInt8Builder, }; -type DListBuilder = ListBuilder>; - - -pub fn read_library(input: &[u8]) -> IResult { - let input_size = input.len(); - - let property_t = DataType::Struct(Fields::from(vec![ - Field::new("key", DataType::Int16, false), - Field::new("value", DataType::Utf8, false), - ])); - - let property_list_t = DataType::List(Arc::new( - Field::new_list_field(property_t, false) - )); - - - let repetition_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("x0", DataType::Int32, false), - Field::new("y0", DataType::Int32, false), - Field::new("x1", DataType::Int32, false), - Field::new("y1", DataType::Int32, false), - Field::new("count0", DataType::Int16, false), - Field::new("count1", DataType::Int16, false), - ])); - - - let ref_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("target", DataType::UInt32, false), - Field::new("invert_y", DataType::Boolean, true), - Field::new("mag", DataType::Float64, true), - Field::new("angle_deg", DataType::Float64, true), - Field::new("x", DataType::Int32, false), - Field::new("y", DataType::Int32, false), - Field::new("repetition", repetition_struct_t, true), - Field::new("properties", property_list_t.clone(), true), - ])); - - - let text_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("presentation_horiz", DataType::UInt8, true), - Field::new("presentation_vert", DataType::UInt8, true), - Field::new("presentation_font", DataType::UInt8, true), - Field::new("path_type", DataType::Int16, true), - Field::new("width", DataType::Int32, true), - Field::new("invert_y", DataType::Boolean, true), - Field::new("mag", DataType::Float64, true), - Field::new("angle_deg", DataType::Float64, true), - Field::new("x", DataType::Int32, false), - Field::new("y", DataType::Int32, false), - Field::new("string", DataType::Utf8, false), - Field::new("properties", property_list_t.clone(), true), - ])); - - - let coords_t = DataType::List(Arc::new( - Field::new_list_field(DataType::Int32, false) - )); - - let boundary_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("xy", coords_t.clone(), false), - Field::new("properties", property_list_t.clone(), true), - ])); - - let path_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("path_type", DataType::Int16, false), - Field::new("extension_start", DataType::Int32, true), - Field::new("extension_end", DataType::Int32, true), - Field::new("width", DataType::Int32, false), - Field::new("xy", coords_t.clone(), false), - Field::new("properties", property_list_t.clone(), true), - ])); - - let boxnode_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("xy", coords_t.clone(), false), - Field::new("properties", property_list_t.clone(), true), - ])); - - - let ref_list_t = DataType::List(Arc::new( - Field::new_list_field(ref_struct_t, false) - )); - - let text_list_t = DataType::List(Arc::new( - Field::new_list_field(text_struct_t, false) - )); - - let boundary_list_t = DataType::List(Arc::new( - Field::new_list_field(boundary_struct_t, false) - )); - - let path_list_t = DataType::List(Arc::new( - Field::new_list_field(path_struct_t, false) - )); - - let boxnode_list_t = DataType::List(Arc::new( - Field::new_list_field(boxnode_struct_t, false) - )); - - - let cell_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("id", DataType::UInt32, false), - Field::new("file_offset", DataType::UInt64, false), - Field::new("refs", ref_list_t, false), - Field::new("boundaries", boundary_list_t, false), - Field::new("paths", path_list_t, false), - Field::new("nodes", boxnode_list_t.clone(), true), - Field::new("boxes", boxnode_list_t.clone(), true), - Field::new("texts", text_list_t, false), - ])); - - let mut lib_builder = StructBuilder::from_fields(vec![ - Field::new("cell_names", DataType::Utf8, false), - Field::new("cells", cell_struct_t, false), - ], - 0, - ); - - let cells_builder = lib_builder.field_builder::(0).unwrap(); - - - let mut names = HashMap::::new(); - - let (mut input, mut header) = RecordHeader::read(input)?; - while header.tag != records::RTAG_ENDLIB { - (input, _) = take_bytes(input, header.data_size)?; - if header.tag == records::RTAG_BGNSTR { - let name_bytes; - (input, name_bytes) = records::STRNAME::read(input)?; - let name = String::from_utf8(name_bytes).unwrap(); - - let next_id = names.len(); - let id = names.entry(name).or_insert(next_id.try_into().unwrap()); - let position = input_size - input.len(); - - let cell_builder = cells_builder.values().as_any_mut().downcast_mut::().unwrap(); - let id_builder = cell_builder.field_builder::(0).unwrap(); - id_builder.append_value(*id); - - let offset_builder = cell_builder.field_builder::(1).unwrap(); - offset_builder.append_value(position.try_into().unwrap()); - - (input, _) = read_elements(input, cell_builder, &mut names)?; - - cells_builder.append(true); - } - (input, header) = RecordHeader::read(input)?; - } - - let mut ids: HashMap = names.into_iter().map(|(kk, vv)| (vv, kk)).collect(); - let names_builder = lib_builder.field_builder::(1).unwrap(); - for id in 0..ids.len() { - names_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); - } - - let lib = lib_builder.finish(); - Ok((input, lib)) -} +pub type DListBuilder = ListBuilder>; pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, names: &mut HashMap) -> IResult<'a, ()> { @@ -266,7 +99,7 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let mut bgn_ext = None; let mut end_ext = None; - let (mut input, mut header) = RecordHeader::read(&input)?; + let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_XY { match header.tag { records::RTAG_PATHTYPE => { @@ -292,7 +125,7 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu _ => return fail(input, format!("Unexpected tag {:04x}", header.tag)), }; - (input, header) = RecordHeader::read(&input)?; + (input, header) = RecordHeader::read(input)?; } let path_type_builder = path_builder.field_builder::(2).unwrap(); path_type_builder.append_option(path_type); @@ -827,7 +660,7 @@ impl Element for Path { let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = DATATYPE::read(input)?; - let (mut input, mut header) = RecordHeader::read(&input)?; + let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_XY { match header.tag { records::RTAG_PATHTYPE => @@ -841,7 +674,7 @@ impl Element for Path { _ => return fail(input, format!("Unexpected tag {:04x}", header.tag)), }; - (input, header) = RecordHeader::read(&input)?; + (input, header) = RecordHeader::read(input)?; } let (input, xy) = XY::read_data(input, header.data_size)?; let (input, properties) = read_properties(input)?; diff --git a/src/iface.rs b/src/iface.rs new file mode 100644 index 0000000..6e88dce --- /dev/null +++ b/src/iface.rs @@ -0,0 +1,425 @@ +/* + * Shapes: + * layer, dtype (2x i16) + * bounds (4x i32) + * offset/ptr (usize) + * cell_id (usize) + * + * Cell: + * name (??) + * offset (usize) + * len (usize) + * + * Refs: + * name_ind (usize) + * invert (bool) + * mag (f64) + * angle (f64) + * offset/ptr (usize) + * + */ + + +use arrow; +use arrow::array::{LargeStringArray, DictionaryArray, UInt64Array, Int32Array, UInt16Array} +use arrow::builder::{LargeStringDictionaryBuilder, UInt64Builder, Int32Builder, Int64Builder, BooleanBuilder}; +use library::FileHeader; + +use std::collections::HashMap; + +pub use crate::record; +pub use crate::record::{RecordHeader, Record}; +pub use crate::records; +pub use crate::elements; +pub use crate::elements::{Element}; +pub use crate::basic::{IResult, OResult, take_bytes, fail}; + + + +const DEFAULT_DATE: [i16; 6] = [1900, 0, 0, 0, 0, 0]; + +#[derive(Debug, Clone)] +pub struct LibraryBuilder { + cell_name: LargeStringDictionaryBuilder, + cell_offset: UInt64Builder, + + ref_parent: UInt64Builder, + ref_name: LargeStringDictionaryBuilder, + ref_mirror: BooleanBuilder, + ref_mag: Float64Builder, + ref_angle: Float64Builder, + ref_offset: UInt64Builder, + + shape_parent: UInt64Builder, + shape_offset: UInt64Builder, + shape_type: UInt8Builder, + shape_layer: UInt16Builder, + shape_dtype: UInt16Builder, + shape_xmin: Int32Builder, + shape_ymin: Int32Builder, + shape_xmax: Int32Builder, + shape_ymax: Int32Builder, + shape_data: LargeListArrayBuilder, + + text_parent: UInt64Builder, + text_string: LargeStringDictionaryBuilder, + text_x: Int32Builder, + text_y: Int32Builder, + + + + boundaries: Vec, + paths: Vec, + nodes: Vec, + boxes: Vec, + texts: Vec, + refs: Vec, +} + +impl Cell { + /// Build an empty cell + pub fn new(name: Vec) -> Self { + Cell{ + name: name, + boundaries: Vec::new(), + paths: Vec::new(), + nodes: Vec::new(), + boxes: Vec::new(), + texts: Vec::new(), + refs: Vec::new(), + } + } + + /// Skip to the next structure and attempt to read it. + /// + /// Args: + /// input: Seekable input to read from. + /// + /// Returns: + /// (name, elements) if a structure was found. + /// None if no structure was found before the end of the library. + /// + pub fn read(input: &[u8]) -> IResult> { + let (input, success) = records::BGNSTR::skip_past(input)?; + if !success { + return Ok((input, None)) + } + + let (input, name) = records::STRNAME::read(input)?; + let mut cell = Cell::new(name); + let (input, _) = cell.read_elements(input)?; + Ok((input, Some(cell))) + } + + /// Read elements from the input until an ENDSTR + /// record is encountered. The ENDSTR record is also + /// consumed. + /// + /// Args: + /// input: Seekable input to read from. + /// + /// Returns: + /// List of element objects. + /// + pub fn read_elements<'a>(&mut self, input: &'a [u8]) -> IResult<'a, ()> { + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDSTR { + match header.tag { + records::RTAG_BOUNDARY => { + let boundary; + (input, _) = records::BOUNDARY::read(input)?; + (input, boundary) = elements::Boundary::read(input)?; + self.boundaries.push(boundary); + }, + records::RTAG_PATH => { + let path; + (input, _) = records::PATH::read(input)?; + (input, path) = elements::Path::read(input)?; + self.paths.push(path); + }, + records::RTAG_NODE => { + let node; + (input, _) = records::NODE::read(input)?; + (input, node) = elements::Node::read(input)?; + self.nodes.push(node); + }, + records::RTAG_BOX => { + let gds_box; + (input, _) = records::BOX::read(input)?; + (input, gds_box) = elements::GDSBox::read(input)?; + self.boxes.push(gds_box); + }, + records::RTAG_TEXT => { + let txt; + (input, _) = records::TEXT::read(input)?; + (input, txt) = elements::Text::read(input)?; + self.texts.push(txt); + }, + records::RTAG_SREF => { + let sref; + (input, _) = records::SREF::read(input)?; + (input, sref) = elements::Reference::read(input)?; + self.refs.push(sref); + }, + records::RTAG_AREF => { + let aref; + (input, _) = records::AREF::read(input)?; + (input, aref) = elements::Reference::read(input)?; + self.refs.push(aref); + }, + _ => { + // don't care, skip + (input, _) = take_bytes(input, header.data_size)?; + } + } + (input, header) = RecordHeader::read(input)?; + } + Ok((input, ())) + } + + /// + /// Write a structure to the provided input. + /// + /// Args: + /// name: Structure name (ascii-encoded). + /// elements: List of Elements containing the geometry and text in this struct. + /// cre_time: Creation time (optional). + /// mod_time: Modification time (optional). + /// + /// Return: + /// Number of bytes written + /// + pub fn write( + &self, + ww: &mut W, + cre_time: Option<[i16; 6]>, + mod_time: Option<[i16; 6]>, + ) -> OResult { + let mut size = 0; + size += records::BGNSTR::write(ww, &[cre_time.unwrap_or(DEFAULT_DATE), + mod_time.unwrap_or(DEFAULT_DATE)])?; + size += records::STRNAME::write(ww, &self.name)?; + size += self.write_elements(ww)?; + size += records::ENDSTR::write(ww, &())?; + Ok(size) + } + + pub fn write_elements(&self, ww: &mut W) -> OResult { + let mut size = 0; + for boundary in &self.boundaries { + size += boundary.write(ww)?; + } + for path in &self.paths { + size += path.write(ww)?; + } + for node in &self.nodes { + size += node.write(ww)?; + } + for gds_box in &self.boxes { + size += gds_box.write(ww)?; + } + for text in &self.texts { + size += text.write(ww)?; + } + for reference in &self.refs { + size += reference.write(ww)?; + } + Ok(size) + } +} + + + +pub fn make_table(input: &[u8]) -> IResult, usize>> { + let input_size = input.len(); + let mut names = LargeStringDictionaryBuilder::new(); + let mut positions = UInt64Builder::new(); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDLIB { + (input, let record_bytes) = take_bytes(input, header.data_size)?; + if header.tag == records::RTAG_BGNSTR { + let position = input_size - input.len(); + + let name; + (input, name) = records::STRNAME::read(record_bytes)?; + + names.append(name); + positions.append(position); + + + pub fn read_elements<'a>(&mut self, input: &'a [u8]) -> IResult<'a, ()> { + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDSTR { + match header.tag { + records::RTAG_BOUNDARY => { + let boundary; + (input, _) = records::BOUNDARY::read(input)?; + (input, boundary) = elements::Boundary::read(input)?; + self.boundaries.push(boundary); + }, + records::RTAG_PATH => { + let path; + (input, _) = records::PATH::read(input)?; + (input, path) = elements::Path::read(input)?; + self.paths.push(path); + }, + records::RTAG_NODE => { + let node; + (input, _) = records::NODE::read(input)?; + (input, node) = elements::Node::read(input)?; + self.nodes.push(node); + }, + records::RTAG_BOX => { + let gds_box; + (input, _) = records::BOX::read(input)?; + (input, gds_box) = elements::GDSBox::read(input)?; + self.boxes.push(gds_box); + }, + records::RTAG_TEXT => { + let txt; + (input, _) = records::TEXT::read(input)?; + (input, txt) = elements::Text::read(input)?; + self.texts.push(txt); + }, + records::RTAG_SREF => { + let sref; + (input, _) = records::SREF::read(input)?; + (input, sref) = elements::Reference::read(input)?; + self.refs.push(sref); + }, + records::RTAG_AREF => { + let aref; + (input, _) = records::AREF::read(input)?; + (input, aref) = elements::Reference::read(input)?; + self.refs.push(aref); + }, + _ => { + // don't care, skip + (input, _) = take_bytes(input, header.data_size)?; + } + } + (input, header) = RecordHeader::read(input)?; + } + Ok((input, ())) + } + + + + } + (input, header) = RecordHeader::read(input)?; + } + Ok((input, positions)) +} + + + + +pub struct References { + invert_y: Vec, + mag_and_angle_deg: Vec, + xy: Vec, + + names: Vec, + name_inds: Vec, + + has_prop: Vec, + has_rep: Vec, +} + +pub struct Repetitions { + colrow_vecs: Vec, // 4 per + colrow_counts: Vec, // 2 per +} + +pub struct Shapes { + layers_and_dtypes: Vec, // 2 per + xy: Vec, // variable per + has_prop: Vec, +} + + + +pub struct References { + invert_y: bool, + mag: f64, + angle_deg: f64, + + xy: (i32, i32), + + // Use id to look up these... maybe include has_props and has_rep? + struct_name: Vec, + properties: HashMap::>, + + col_vec: (i32 i32), + row_vec: (i32, i32), + colrow: (i16, i16), +} + +pub struct Boundary { + /// (layer, data_type) tuple + layer: (i16, i16), + /// Ordered vertices of the shape. First and last points should be identical. Order x0, y0, x1,... + xy: Vec, + /// Properties for the element. + properties: HashMap::>, +} + +pub struct Path { + /// (layer, data_type) tuple + layer: (i16, i16), + /// End-cap type (0: flush, 1: circle, 2: square, 4: custom) + path_type: i16, + /// Path width + width: i32, + /// Extension when using path_type=4. Ignored otherwise. + extension: (i32, i32), + /// Path centerline coordinates. [x0, y0, x1, y1,...] + xy: Vec, + /// Properties for the element. + properties: HashMap::>, +} + +pub struct GDSBox { + /// (layer, box_type) tuple + layer: (i16, i16), + /// Box coordinates (5 pairs) + xy: Vec, + /// Properties for the element. + properties: HashMap::>, +} +pub struct Node { + /// (layer, box_type) tuple + layer: (i16, i16), + /// 1-50 pairs of coordinates. + xy: Vec, + /// Properties for the element. + properties: HashMap::>, +} +pub struct Text { + /// (layer, node_type) tuple + layer: (i16, i16), + + /// Bit array. Default all zeros. + /// bits 0-1: 00 left/01 center/10 right + /// bits 2-3: 00 top/01 middle/10 bottom + /// bits 4-5: font number + presentation: [bool; 16], + + /// Default 0 + path_type: i16, + /// Default 0 + width: i32, + /// Vertical inversion. Default false. + invert_y: bool, + /// Scaling factor. Default 1. + mag: f64, + /// Rotation (ccw). Default 0. + angle_deg: f64, + /// Position (1 pair only) + xy: Vec, + /// Text content + string: Vec, + /// Properties for the element. + properties: HashMap::> +} diff --git a/src/lib.rs b/src/lib.rs index cf8365c..0f8c365 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,39 +48,39 @@ macro_rules! impl_i32be { #[no_mangle] -pub extern "C" fn f64_to_i16(arr: *mut f64, size: usize) -> f64 { impl_i16be!(f64, arr, size) } +pub unsafe extern "C" fn f64_to_i16(arr: *mut f64, size: usize) -> f64 { impl_i16be!(f64, arr, size) } #[no_mangle] -pub extern "C" fn f64_to_i32(arr: *mut f64, size: usize) -> f64 { impl_i32be!(f64, arr, size) } +pub unsafe extern "C" fn f64_to_i32(arr: *mut f64, size: usize) -> f64 { impl_i32be!(f64, arr, size) } #[no_mangle] -pub extern "C" fn f32_to_i16(arr: *mut f32, size: usize) -> f32 { impl_i16be!(f32, arr, size) } +pub unsafe extern "C" fn f32_to_i16(arr: *mut f32, size: usize) -> f32 { impl_i16be!(f32, arr, size) } #[no_mangle] -pub extern "C" fn f32_to_i32(arr: *mut f32, size: usize) -> f32 { impl_i32be!(f32, arr, size) } +pub unsafe extern "C" fn f32_to_i32(arr: *mut f32, size: usize) -> f32 { impl_i32be!(f32, arr, size) } #[no_mangle] -pub extern "C" fn u64_to_i16(arr: *mut u64, size: usize) -> u64 { impl_i16be!(u64, arr, size) } +pub unsafe extern "C" fn u64_to_i16(arr: *mut u64, size: usize) -> u64 { impl_i16be!(u64, arr, size) } #[no_mangle] -pub extern "C" fn u64_to_i32(arr: *mut u64, size: usize) -> u64 { impl_i32be!(u64, arr, size) } +pub unsafe extern "C" fn u64_to_i32(arr: *mut u64, size: usize) -> u64 { impl_i32be!(u64, arr, size) } #[no_mangle] -pub extern "C" fn i64_to_i16(arr: *mut i64, size: usize) -> i64 { impl_i16be!(i64, arr, size) } +pub unsafe extern "C" fn i64_to_i16(arr: *mut i64, size: usize) -> i64 { impl_i16be!(i64, arr, size) } #[no_mangle] -pub extern "C" fn i64_to_i32(arr: *mut i64, size: usize) -> i64 { impl_i32be!(i64, arr, size) } +pub unsafe extern "C" fn i64_to_i32(arr: *mut i64, size: usize) -> i64 { impl_i32be!(i64, arr, size) } #[no_mangle] -pub extern "C" fn u32_to_i16(arr: *mut u32, size: usize) -> u32 { impl_i16be!(u32, arr, size) } +pub unsafe extern "C" fn u32_to_i16(arr: *mut u32, size: usize) -> u32 { impl_i16be!(u32, arr, size) } #[no_mangle] -pub extern "C" fn u32_to_i32(arr: *mut u32, size: usize) -> u32 { impl_i32be!(u32, arr, size) } +pub unsafe extern "C" fn u32_to_i32(arr: *mut u32, size: usize) -> u32 { impl_i32be!(u32, arr, size) } #[no_mangle] -pub extern "C" fn i32_to_i16(arr: *mut i32, size: usize) -> i32 { impl_i16be!(i32, arr, size) } +pub unsafe extern "C" fn i32_to_i16(arr: *mut i32, size: usize) -> i32 { impl_i16be!(i32, arr, size) } #[no_mangle] -pub extern "C" fn i32_to_i32(arr: *mut i32, size: usize) -> i32 { impl_i32be!(i32, arr, size) } +pub unsafe extern "C" fn i32_to_i32(arr: *mut i32, size: usize) -> i32 { impl_i32be!(i32, arr, size) } #[no_mangle] -pub extern "C" fn u16_to_i16(arr: *mut u16, size: usize) -> u16 { impl_i16be!(u16, arr, size) } +pub unsafe extern "C" fn u16_to_i16(arr: *mut u16, size: usize) -> u16 { impl_i16be!(u16, arr, size) } #[no_mangle] -pub extern "C" fn i16_to_i16(arr: *mut i16, size: usize) -> i16 { impl_i16be!(i16, arr, size) } +pub unsafe extern "C" fn i16_to_i16(arr: *mut i16, size: usize) -> i16 { impl_i16be!(i16, arr, size) } diff --git a/src/library.rs b/src/library.rs index ee392ed..bad6229 100644 --- a/src/library.rs +++ b/src/library.rs @@ -9,11 +9,18 @@ pub use crate::record; pub use crate::record::{RecordHeader, Record}; pub use crate::records; pub use crate::elements; -//pub use crate::elements::{Element}; +pub use crate::elements::{read_elements, DListBuilder}; pub use crate::basic::{IResult, OResult, take_bytes, fail}; +use std::string::String; +use std::collections::HashMap; +use std::sync::Arc; -const DEFAULT_DATE: [i16; 6] = [1900, 0, 0, 0, 0, 0]; +use arrow::datatypes::{DataType, Field, Fields}; +use arrow::array::{ + StructBuilder, StringBuilder, UInt64Builder, UInt32Builder, Int16Builder, Float64Builder, + FixedSizeListBuilder, StructArray, +}; /// @@ -44,8 +51,8 @@ impl FileHeader { mod_time: [0, 1, 1, 0, 0, 0], acc_time: [0, 1, 1, 0, 0, 0], name: name.to_owned(), - user_units_per_db_unit: user_units_per_db_unit, - meters_per_db_unit: meters_per_db_unit, + user_units_per_db_unit, + meters_per_db_unit, } } @@ -64,9 +71,9 @@ impl FileHeader { let (input, (uu, dbu)) = records::UNITS::skip_and_read(input)?; Ok((input, FileHeader{ - mod_time: mod_time, - acc_time: acc_time, - name: name, + mod_time, + acc_time, + name, user_units_per_db_unit: uu, meters_per_db_unit: dbu, })) @@ -90,6 +97,199 @@ impl FileHeader { } } + +pub fn read_library(input: &[u8]) -> IResult { + let input_size = input.len(); + + let property_t = DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int16, false), + Field::new("value", DataType::Utf8, false), + ])); + + let property_list_t = DataType::List(Arc::new( + Field::new_list_field(property_t, false) + )); + + + let repetition_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("x0", DataType::Int32, false), + Field::new("y0", DataType::Int32, false), + Field::new("x1", DataType::Int32, false), + Field::new("y1", DataType::Int32, false), + Field::new("count0", DataType::Int16, false), + Field::new("count1", DataType::Int16, false), + ])); + + + let ref_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, true), + Field::new("mag", DataType::Float64, true), + Field::new("angle_deg", DataType::Float64, true), + Field::new("x", DataType::Int32, false), + Field::new("y", DataType::Int32, false), + Field::new("repetition", repetition_struct_t, true), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let text_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("presentation_horiz", DataType::UInt8, true), + Field::new("presentation_vert", DataType::UInt8, true), + Field::new("presentation_font", DataType::UInt8, true), + Field::new("path_type", DataType::Int16, true), + Field::new("width", DataType::Int32, true), + Field::new("invert_y", DataType::Boolean, true), + Field::new("mag", DataType::Float64, true), + Field::new("angle_deg", DataType::Float64, true), + Field::new("x", DataType::Int32, false), + Field::new("y", DataType::Int32, false), + Field::new("string", DataType::Utf8, false), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let coords_t = DataType::List(Arc::new( + Field::new_list_field(DataType::Int32, false) + )); + + let boundary_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + let path_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("path_type", DataType::Int16, false), + Field::new("extension_start", DataType::Int32, true), + Field::new("extension_end", DataType::Int32, true), + Field::new("width", DataType::Int32, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + let boxnode_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt16, false), + Field::new("dtype", DataType::UInt16, false), + Field::new("xy", coords_t.clone(), false), + Field::new("properties", property_list_t.clone(), true), + ])); + + + let ref_list_t = DataType::List(Arc::new( + Field::new_list_field(ref_struct_t, false) + )); + + let text_list_t = DataType::List(Arc::new( + Field::new_list_field(text_struct_t, false) + )); + + let boundary_list_t = DataType::List(Arc::new( + Field::new_list_field(boundary_struct_t, false) + )); + + let path_list_t = DataType::List(Arc::new( + Field::new_list_field(path_struct_t, false) + )); + + let boxnode_list_t = DataType::List(Arc::new( + Field::new_list_field(boxnode_struct_t, false) + )); + + let name_list_t = DataType::List(Arc::new( + Field::new_list_field(DataType::Utf8, false) + )); + + let time_t = DataType::FixedSizeList(Arc::new( + Field::new_list_field(DataType::Int16, false), + ), + 6, + ); + + let cell_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("id", DataType::UInt32, false), + Field::new("file_offset", DataType::UInt64, false), + Field::new("refs", ref_list_t, false), + Field::new("boundaries", boundary_list_t, false), + Field::new("paths", path_list_t, false), + Field::new("nodes", boxnode_list_t.clone(), true), + Field::new("boxes", boxnode_list_t.clone(), true), + Field::new("texts", text_list_t, false), + ])); + + let mut lib_builder = StructBuilder::from_fields(vec![ + Field::new("meters_per_db_unit", DataType::Float64, false), + Field::new("user_units_per_db_unit", DataType::Float64, false), + Field::new("lib_name", DataType::Utf8, false), + Field::new("mod_time", time_t.clone(), false), + Field::new("acc_time", time_t.clone(), false), + Field::new("cell_names", name_list_t, false), + Field::new("cells", cell_struct_t, false), + ], + 0, + ); + + let (input, header) = FileHeader::read(input)?; + let dbu_builder = lib_builder.field_builder::(0).unwrap(); + dbu_builder.append_value(header.meters_per_db_unit); + let uu_builder = lib_builder.field_builder::(1).unwrap(); + uu_builder.append_value(header.user_units_per_db_unit); + let libname_builder = lib_builder.field_builder::(2).unwrap(); + libname_builder.append_value(String::from_utf8(header.name).unwrap()); + let mt_builder = lib_builder.field_builder::>(3).unwrap(); + mt_builder.values().append_values(&header.mod_time, &[true; 6]); + let at_builder = lib_builder.field_builder::>(4).unwrap(); + at_builder.values().append_values(&header.acc_time, &[true; 6]); + + + + let cells_builder = lib_builder.field_builder::(5).unwrap(); + + + let mut names = HashMap::::new(); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != records::RTAG_ENDLIB { + (input, _) = take_bytes(input, header.data_size)?; + if header.tag == records::RTAG_BGNSTR { + let name_bytes; + (input, name_bytes) = records::STRNAME::read(input)?; + let name = String::from_utf8(name_bytes).unwrap(); + + let next_id = names.len(); + let id = names.entry(name).or_insert(next_id.try_into().unwrap()); + let position = input_size - input.len(); + + let cell_builder = cells_builder.values().as_any_mut().downcast_mut::().unwrap(); + let id_builder = cell_builder.field_builder::(0).unwrap(); + id_builder.append_value(*id); + + let offset_builder = cell_builder.field_builder::(1).unwrap(); + offset_builder.append_value(position.try_into().unwrap()); + + (input, _) = read_elements(input, cell_builder, &mut names)?; + + cells_builder.append(true); + } + (input, header) = RecordHeader::read(input)?; + } + + let mut ids: HashMap = names.into_iter().map(|(kk, vv)| (vv, kk)).collect(); + let names_builder = lib_builder.field_builder::(6).unwrap(); + for id in 0..ids.len() { + names_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); + } + + lib_builder.append(true); + let lib = lib_builder.finish(); + Ok((input, lib)) +} + /* /// /// Scan through a GDS file, building a table of diff --git a/src/misc.py b/src/misc.py new file mode 100644 index 0000000..ab4c9bf --- /dev/null +++ b/src/misc.py @@ -0,0 +1,40 @@ +''' +https://github.com/apache/arrow/blob/main/python/pyarrow/tests/test_cffi.py +''' +# -*- coding: utf-8 -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +import ctypes +import pyarrow as pa +from pyarrow.cffi import ffi + + c_schema = ffi.new('struct ArrowSchema*') + ptr_schema = int(ffi.cast('uintptr_t', c_schema)) + c_array = ffi.new('struct ArrowArray*') + ptr_array = int(ffi.cast('uintptr_t', c_array)) + + # pyarrow.Array._import_from_c(ptr_array, pa.list_(pa.int32())) + + # import gc + # gc.collect() # Make sure no Arrow data dangles in a ref cycle + # pyarrow.Array._export_from_c(arr, ptr_array, ptr_schema) + + arr_new = pyarrow.Array._import_from_c(ptr_array, ptr_schema) + + diff --git a/src/record.rs b/src/record.rs index da2e12b..8d106c9 100644 --- a/src/record.rs +++ b/src/record.rs @@ -23,10 +23,10 @@ pub struct RecordHeader { } impl RecordHeader { - pub fn read<'a>(input: &[u8]) -> IResult { + pub fn read(input: &[u8]) -> IResult { let (input, size) = parse_u16(input)?; let (input, tag) = parse_u16(input)?; - Ok((input, RecordHeader{tag:tag, data_size:size - 4})) + Ok((input, RecordHeader{tag, data_size:size - 4})) } pub fn pack_into(&self) -> [u8; 4] { @@ -49,7 +49,7 @@ pub trait RecordData { type InData : ?Sized; type ByteData : AsRef<[u8]>; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>>; + fn read(input: &[u8], size: u16) -> IResult>; fn pack_into(buf: &mut [u8], data: &Self::InData); //fn size(data: &Self::BareData<'_>) -> u16; fn pack(data: &Self::InData) -> Self::ByteData; @@ -76,7 +76,7 @@ pub trait Record { } fn write_header(ww: &mut W, data_size: u16) -> OResult { - RecordHeader{tag: Self::tag(), data_size: data_size}.write(ww) + RecordHeader{tag: Self::tag(), data_size}.write(ww) } fn read_data(input: &[u8], size: u16) -> IResult> { @@ -127,7 +127,7 @@ pub trait Record { } } - fn read<'a>(input: &'a [u8]) -> IResult<'a, RData::BareData<'a>> { + fn read(input: &[u8]) -> IResult> { let (input, size) = Self::expect_header(input)?; Self::check_size(size).unwrap(); let (input, data) = Self::read_data(input, size)?; @@ -151,7 +151,7 @@ impl RecordData for BitArray { type InData = [bool; 16]; type ByteData = [u8; 2]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 2); parse_bitarray(input) } @@ -174,7 +174,7 @@ impl RecordData for Int2 { type InData = i16; type ByteData = [u8; 2]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 2); parse_int2(input) } @@ -196,7 +196,7 @@ impl RecordData for Int4 { type InData = i32; type ByteData = [u8; 4]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 4); parse_int4(input) } @@ -219,15 +219,15 @@ impl RecordData for Int2Array { type InData = [i16]; type ByteData = Vec; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size % 2 == 0, "Record must contain an integer quantity of integers"); //let mut input = input; let (input, bytes) = take_bytes(input, size)?; - Ok((input, Int2ArrayReader{bytes: bytes})) + Ok((input, Int2ArrayReader{bytes})) } fn pack_into(buf: &mut [u8], data: &Self::InData) { - BigEndian::write_i16_into(&data, buf) + BigEndian::write_i16_into(data, buf) } fn pack(data: &Self::InData) -> Self::ByteData { @@ -260,15 +260,15 @@ impl RecordData for Int4Array { type InData = [i32]; type ByteData = Vec; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size % 4 == 0, "Record must contain an integer quantity of integers"); //let mut input = input; let (input, bytes) = take_bytes(input, size)?; - Ok((input, Int4ArrayReader{bytes: bytes})) + Ok((input, Int4ArrayReader{bytes})) } fn pack_into(buf: &mut [u8], data: &Self::InData) { - BigEndian::write_i32_into(&data, buf) + BigEndian::write_i32_into(data, buf) } fn pack(data: &Self::InData) -> Self::ByteData { @@ -301,13 +301,13 @@ impl RecordData for Real8 { type InData = f64; type ByteData = [u8; 8]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 8); parse_real8(input) } fn pack_into(buf: &mut [u8], data: &Self::InData) { - pack_real8(buf, *data).expect(&format!("Float {0} too big for Real8", data)) + pack_real8(buf, *data).unwrap_or_else(|_| panic!("Float {0} too big for Real8", data)) } fn pack(data: &Self::InData) -> Self::ByteData { @@ -323,7 +323,7 @@ impl RecordData for Real8Pair { type InData = (f64, f64); type ByteData = [u8; 2 * 8]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 2 * 8); let (input, data0) = parse_real8(input)?; let (input, data1) = parse_real8(input)?; @@ -331,8 +331,8 @@ impl RecordData for Real8Pair { } fn pack_into(buf: &mut [u8], data: &Self::InData) { - pack_real8(&mut buf[8 * 0..], data.0).expect(&format!("Float.0 {0} too big for Real8", data.0)); - pack_real8(&mut buf[8 * 1..], data.1).expect(&format!("Float.1 {0} too big for Real8", data.1)); + pack_real8(&mut buf[8 * 0..], data.0).unwrap_or_else(|_| panic!("Float.0 {0} too big for Real8", data.0)); + pack_real8(&mut buf[8 * 1..], data.1).unwrap_or_else(|_| panic!("Float.1 {0} too big for Real8", data.1)); } fn pack(data: &Self::InData) -> Self::ByteData { @@ -353,7 +353,7 @@ impl RecordData for ASCII { type InData = [u8]; type ByteData = Vec; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { parse_ascii(input, size) } @@ -375,7 +375,7 @@ impl RecordData for DateTimePair { type InData = [[i16; 6]; 2]; type ByteData = [u8; 2 * 6 * 2]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 2 * 6 * 2); let (input, data0) = parse_datetime(input)?; let (input, data1) = parse_datetime(input)?; @@ -405,7 +405,7 @@ impl RecordData for Empty { type InData = (); type ByteData = [u8; 0]; - fn read<'a>(input: &'a [u8], size: u16) -> IResult<'a, Self::BareData<'a>> { + fn read(input: &[u8], size: u16) -> IResult> { assert!(size == 0); Ok((input, ())) } From e2ba1390c6cbc123c818534a41476ffeef5481f3 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 12 Apr 2025 12:06:36 -0700 Subject: [PATCH 21/31] first working attempt --- Cargo.toml | 2 +- src/elements.rs | 84 ++++++++++++++++++++++++++++++++++--------------- src/lib.rs | 35 +++++++++++++++++++++ src/library.rs | 58 +++++++++++++++++++--------------- src/misc.py | 1 - src/record.rs | 1 + src/records.rs | 4 +-- test.py | 35 +++++++++++++++++++++ 8 files changed, 165 insertions(+), 55 deletions(-) create mode 100644 test.py diff --git a/Cargo.toml b/Cargo.toml index c7deda4..aa0b4a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,4 +12,4 @@ crate-type = ["cdylib", "rlib"] [dependencies] byteorder = "^1" -arrow = "^54" +arrow = {version = "^54", features = ["ffi"]} diff --git a/src/elements.rs b/src/elements.rs index 3e41115..ee6e7e8 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -20,35 +20,54 @@ use std::collections::HashMap; //use std::io::Write; use arrow::array::{ - StructBuilder, ListBuilder, StringBuilder, ArrayBuilder, Float64Builder, BooleanBuilder, - Int32Builder, Int16Builder, UInt32Builder, UInt8Builder, + StructBuilder, FixedSizeListBuilder, ListBuilder, StringBuilder, ArrayBuilder, BooleanBuilder, + Int32Builder, Int16Builder, UInt32Builder, UInt8Builder, Float64Builder, }; pub type DListBuilder = ListBuilder>; +pub type FListBuilder = FixedSizeListBuilder>; pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, names: &mut HashMap) -> IResult<'a, ()> { - let (mut input, mut header) = RecordHeader::read(input)?; + let mut input = input; + let (_, mut header) = RecordHeader::read(input)?; // don't consume tag while header.tag != records::RTAG_ENDSTR { - match header.tag { - records::RTAG_BOUNDARY => {(input, _) = read_boundary(input, cell_builder)?;}, - records::RTAG_PATH => {read_path(input, cell_builder)?;}, - records::RTAG_NODE => {read_boxnode(input, cell_builder, header.tag)?;}, - records::RTAG_BOX => {read_boxnode(input, cell_builder, header.tag)?;}, - records::RTAG_TEXT => {read_text(input, cell_builder)?;}, - records::RTAG_SREF => {read_ref(input, cell_builder, header.tag, names)?;}, - records::RTAG_AREF => {read_ref(input, cell_builder, header.tag, names)?;}, + (input, _) = match header.tag { + records::RTAG_SREF => read_ref(input, cell_builder, header.tag, names)?, + records::RTAG_AREF => read_ref(input, cell_builder, header.tag, names)?, + records::RTAG_BOUNDARY => read_boundary(input, cell_builder)?, + records::RTAG_PATH => read_path(input, cell_builder)?, + records::RTAG_NODE => read_boxnode(input, cell_builder, header.tag)?, + records::RTAG_BOX => read_boxnode(input, cell_builder, header.tag)?, + records::RTAG_TEXT => read_text(input, cell_builder)?, _ => { // don't care, skip - (input, _) = take_bytes(input, header.data_size)?; + let result = take_bytes(input, header.data_size + 4)?; + (result.0, ()) } - } - (input, header) = RecordHeader::read(input)?; + }; + (_, header) = RecordHeader::read(input)?; // don't consume tag } + (input, _) = take_bytes(input, 4_usize)?; // consume endstr tag + + let refs_builder = cell_builder.field_builder::(2).unwrap(); + refs_builder.append(true); + let boundaries_builder = cell_builder.field_builder::(3).unwrap(); + boundaries_builder.append(true); + let paths_builder = cell_builder.field_builder::(4).unwrap(); + paths_builder.append(true); + let nodes_builder = cell_builder.field_builder::(5).unwrap(); + nodes_builder.append(true); + let boxes_builder = cell_builder.field_builder::(6).unwrap(); + boxes_builder.append(true); + let texts_builder = cell_builder.field_builder::(7).unwrap(); + texts_builder.append(true); + Ok((input, ())) } + pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> { let boundaries_builder = cell_builder.field_builder::(3).unwrap(); let boundary_builder = boundaries_builder.values().as_any_mut().downcast_mut::().unwrap(); @@ -75,7 +94,6 @@ pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> I let (input, ()) = read_properties(input, props_builder)?; boundary_builder.append(true); - boundaries_builder.append(true); Ok((input, ())) } @@ -84,7 +102,6 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let paths_builder = cell_builder.field_builder::(4).unwrap(); let path_builder = paths_builder.values().as_any_mut().downcast_mut::().unwrap(); - let (input, _) = records::PATH::read(input)?; let (input, layer) = LAYER::skip_and_read(input)?; @@ -137,7 +154,7 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu width_builder.append_option(width); let xys_builder = path_builder.field_builder::(6).unwrap(); - let (input, xy_iter) = XY::read(input)?; + let (input, xy_iter) = XY::read_data(input, header.data_size)?; for xy in xy_iter { let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); xy_builder.append_value(xy); @@ -148,7 +165,6 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let (input, ()) = read_properties(input, props_builder)?; path_builder.append(true); - paths_builder.append(true); Ok((input, ())) } @@ -192,7 +208,6 @@ pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: let (input, ()) = read_properties(input, props_builder)?; boxnode_builder.append(true); - boxnodes_builder.append(true); Ok((input, ())) } @@ -200,7 +215,6 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let texts_builder = cell_builder.field_builder::(7).unwrap(); let text_builder = texts_builder.values().as_any_mut().downcast_mut::().unwrap(); - let mut path_type = None; let mut pres_hori = None; let mut pres_vert = None; @@ -277,7 +291,7 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let angle_builder = text_builder.field_builder::(9).unwrap(); angle_builder.append_option(angle_deg); - let (input, mut xy_iter) = XY::read(input)?; + let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; let x_builder = text_builder.field_builder::(10).unwrap(); x_builder.append_value(xy_iter.next().unwrap()); let y_builder = text_builder.field_builder::(11).unwrap(); @@ -288,19 +302,24 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let string_builder = text_builder.field_builder::(12).unwrap(); string_builder.append_value(string); - let props_builder = text_builder.field_builder::(13).unwrap(); + let props_builder = text_builder.field_builder::(13).unwrap(); let (input, ()) = read_properties(input, props_builder)?; text_builder.append(true); - texts_builder.append(true); Ok((input, ())) } pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, names: &mut HashMap) -> IResult<'a, ()> { + let (input, _) = match tag { + records::RTAG_SREF => records::SREF::read(input)?, + records::RTAG_AREF => records::AREF::read(input)?, + _ => return fail(input, format!("Unexpected tag {:04x}", tag)), + }; + let is_aref = tag == records::RTAG_AREF; - let refs_builder = cell_builder.field_builder::(7).unwrap(); + let refs_builder = cell_builder.field_builder::(2).unwrap(); let ref_builder = refs_builder.values().as_any_mut().downcast_mut::().unwrap(); let mut invert_y = None; @@ -361,6 +380,7 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, y_builder.append_value(xy_iter.next().unwrap()); let rep_builder = ref_builder.field_builder::(6).unwrap(); + println!("ref, {is_aref:?}"); if is_aref { let x0_builder = rep_builder.field_builder::(0).unwrap(); x0_builder.append_value(xy_iter.next().unwrap()); @@ -380,6 +400,19 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, count1_builder.append_value(count1); }, } + } else { + let x0_builder = rep_builder.field_builder::(0).unwrap(); + x0_builder.append_null(); + let y0_builder = rep_builder.field_builder::(1).unwrap(); + y0_builder.append_null(); + let x1_builder = rep_builder.field_builder::(2).unwrap(); + x1_builder.append_null(); + let y1_builder = rep_builder.field_builder::(3).unwrap(); + y1_builder.append_null(); + let count0_builder = rep_builder.field_builder::(4).unwrap(); + count0_builder.append_null(); + let count1_builder = rep_builder.field_builder::(5).unwrap(); + count1_builder.append_null(); } rep_builder.append(is_aref); @@ -387,7 +420,6 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, let (input, ()) = read_properties(input, props_builder)?; ref_builder.append(true); - refs_builder.append(true); Ok((input, ())) } @@ -405,7 +437,6 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, /// propattr: -> propvalue mapping /// pub fn read_properties<'a>(input: &'a [u8], props_builder: &mut DListBuilder) -> IResult<'a, ()> { - let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != ENDEL::tag() { @@ -418,6 +449,7 @@ pub fn read_properties<'a>(input: &'a [u8], props_builder: &mut DListBuilder) -> let value = String::from_utf8(value_bytes).unwrap(); //assert!(!properties.contains_key(&key), "Duplicate property key: {}", key); + let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); let key_builder = prop_builder.field_builder::(0).unwrap(); key_builder.append_value(key); diff --git a/src/lib.rs b/src/lib.rs index 0f8c365..33b7598 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,9 +6,44 @@ pub mod records; pub mod elements; pub mod library; +use crate::library::read_library; use byteorder::{ByteOrder, BigEndian}; use std::mem::size_of; +use std::ffi::{CStr, OsStr, c_char}; +use std::os::unix::ffi::OsStrExt; +use std::str; +use std::fs; +use std::path::Path; + +use arrow::ffi::{to_ffi, FFI_ArrowArray, FFI_ArrowSchema}; +use arrow::array::Array; + + +#[no_mangle] +pub unsafe extern "C" fn read_path( + cpath: *const c_char, + arr: *mut FFI_ArrowArray, + schema: *mut FFI_ArrowSchema, + ) { + let cstr = unsafe { CStr::from_ptr(cpath) }; + let path: &Path; + if cfg!(unix) { + let osstr = OsStr::from_bytes(cstr.to_bytes()); + path = osstr.as_ref(); + } else if cfg!(windows) { + let ustr = str::from_utf8(cstr.to_bytes()).expect("Non-UTF8 paths are not supported"); + path = ustr.as_ref(); + } else { + panic!("Unsupported OS"); + } + + let input = fs::read(path).expect("File read failed"); + let (_input, struct_arr) = read_library(&input).expect("Read failed"); + let (mut arr_v, mut schema_v) = to_ffi(&struct_arr.to_data()).unwrap(); + *arr = arr_v; + *schema = schema_v; +} macro_rules! impl_i16be { diff --git a/src/library.rs b/src/library.rs index bad6229..c2c4e13 100644 --- a/src/library.rs +++ b/src/library.rs @@ -9,7 +9,7 @@ pub use crate::record; pub use crate::record::{RecordHeader, Record}; pub use crate::records; pub use crate::elements; -pub use crate::elements::{read_elements, DListBuilder}; +pub use crate::elements::{read_elements, DListBuilder, FListBuilder}; pub use crate::basic::{IResult, OResult, take_bytes, fail}; use std::string::String; @@ -19,7 +19,7 @@ use std::sync::Arc; use arrow::datatypes::{DataType, Field, Fields}; use arrow::array::{ StructBuilder, StringBuilder, UInt64Builder, UInt32Builder, Int16Builder, Float64Builder, - FixedSizeListBuilder, StructArray, + StructArray, }; @@ -134,8 +134,8 @@ pub fn read_library(input: &[u8]) -> IResult { let text_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), + Field::new("layer", DataType::Int16, false), + Field::new("dtype", DataType::Int16, false), Field::new("presentation_horiz", DataType::UInt8, true), Field::new("presentation_vert", DataType::UInt8, true), Field::new("presentation_font", DataType::UInt8, true), @@ -156,16 +156,16 @@ pub fn read_library(input: &[u8]) -> IResult { )); let boundary_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), + Field::new("layer", DataType::Int16, false), + Field::new("dtype", DataType::Int16, false), Field::new("xy", coords_t.clone(), false), Field::new("properties", property_list_t.clone(), true), ])); let path_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("path_type", DataType::Int16, false), + Field::new("layer", DataType::Int16, false), + Field::new("dtype", DataType::Int16, false), + Field::new("path_type", DataType::Int16, true), Field::new("extension_start", DataType::Int32, true), Field::new("extension_end", DataType::Int32, true), Field::new("width", DataType::Int32, false), @@ -174,8 +174,8 @@ pub fn read_library(input: &[u8]) -> IResult { ])); let boxnode_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), + Field::new("layer", DataType::Int16, false), + Field::new("dtype", DataType::Int16, false), Field::new("xy", coords_t.clone(), false), Field::new("properties", property_list_t.clone(), true), ])); @@ -222,6 +222,10 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("texts", text_list_t, false), ])); + let cells_list_t = DataType::List(Arc::new( + Field::new_list_field(cell_struct_t, false) + )); + let mut lib_builder = StructBuilder::from_fields(vec![ Field::new("meters_per_db_unit", DataType::Float64, false), Field::new("user_units_per_db_unit", DataType::Float64, false), @@ -229,7 +233,7 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("mod_time", time_t.clone(), false), Field::new("acc_time", time_t.clone(), false), Field::new("cell_names", name_list_t, false), - Field::new("cells", cell_struct_t, false), + Field::new("cells", cells_list_t, false), ], 0, ); @@ -241,17 +245,17 @@ pub fn read_library(input: &[u8]) -> IResult { uu_builder.append_value(header.user_units_per_db_unit); let libname_builder = lib_builder.field_builder::(2).unwrap(); libname_builder.append_value(String::from_utf8(header.name).unwrap()); - let mt_builder = lib_builder.field_builder::>(3).unwrap(); - mt_builder.values().append_values(&header.mod_time, &[true; 6]); - let at_builder = lib_builder.field_builder::>(4).unwrap(); - at_builder.values().append_values(&header.acc_time, &[true; 6]); - - - - let cells_builder = lib_builder.field_builder::(5).unwrap(); - + let mtl_builder = lib_builder.field_builder::(3).unwrap(); + let mt_builder = mtl_builder.values().as_any_mut().downcast_mut::().unwrap(); + mt_builder.append_values(&header.mod_time, &[true; 6]); + mtl_builder.append(true); + let atl_builder = lib_builder.field_builder::(4).unwrap(); + let at_builder = atl_builder.values().as_any_mut().downcast_mut::().unwrap(); + at_builder.append_values(&header.acc_time, &[true; 6]); + atl_builder.append(true); let mut names = HashMap::::new(); + let cells_builder = lib_builder.field_builder::(6).unwrap(); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_ENDLIB { @@ -260,6 +264,7 @@ pub fn read_library(input: &[u8]) -> IResult { let name_bytes; (input, name_bytes) = records::STRNAME::read(input)?; let name = String::from_utf8(name_bytes).unwrap(); + println!("{name}"); let next_id = names.len(); let id = names.entry(name).or_insert(next_id.try_into().unwrap()); @@ -274,16 +279,19 @@ pub fn read_library(input: &[u8]) -> IResult { (input, _) = read_elements(input, cell_builder, &mut names)?; - cells_builder.append(true); - } + cell_builder.append(true); + } (input, header) = RecordHeader::read(input)?; } + cells_builder.append(true); let mut ids: HashMap = names.into_iter().map(|(kk, vv)| (vv, kk)).collect(); - let names_builder = lib_builder.field_builder::(6).unwrap(); + let names_builder = lib_builder.field_builder::(5).unwrap(); + let name_builder = names_builder.values().as_any_mut().downcast_mut::().unwrap(); for id in 0..ids.len() { - names_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); + name_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); } + names_builder.append(true); lib_builder.append(true); let lib = lib_builder.finish(); diff --git a/src/misc.py b/src/misc.py index ab4c9bf..1e507c7 100644 --- a/src/misc.py +++ b/src/misc.py @@ -19,7 +19,6 @@ https://github.com/apache/arrow/blob/main/python/pyarrow/tests/test_cffi.py # specific language governing permissions and limitations # under the License. -import contextlib import ctypes import pyarrow as pa from pyarrow.cffi import ffi diff --git a/src/record.rs b/src/record.rs index 8d106c9..3226dbb 100644 --- a/src/record.rs +++ b/src/record.rs @@ -17,6 +17,7 @@ use crate::records; #[repr(C)] +#[derive(Debug, Clone)] pub struct RecordHeader { pub tag: u16, pub data_size: u16, diff --git a/src/records.rs b/src/records.rs index 4fe21fa..5a7e2cc 100644 --- a/src/records.rs +++ b/src/records.rs @@ -112,7 +112,7 @@ impl Record for HEADER { pub struct BGNLIB; impl Record for BGNLIB { fn tag() -> u16 { RTAG_BGNLIB } - fn expected_size() -> Option { Some(2 * 6) } + fn expected_size() -> Option { Some(2 * 2 * 6) } } pub struct LIBNAME; @@ -143,7 +143,7 @@ impl Record for BGNSTR { pub struct STRNAME; impl Record for STRNAME { fn tag() -> u16 { RTAG_STRNAME } - fn expected_size() -> Option { Some(2 * 6) } + fn expected_size() -> Option { None } } pub struct ENDSTR; diff --git a/test.py b/test.py new file mode 100644 index 0000000..998dff1 --- /dev/null +++ b/test.py @@ -0,0 +1,35 @@ +import ctypes +import pyarrow +from pyarrow.cffi import ffi + +#c_schema = ffi.new('struct ArrowSchema*') +#c_array = ffi.new('struct ArrowArray*') +#ptr_schema = int(ffi.cast('uintptr_t', c_schema)) +#ptr_array = int(ffi.cast('uintptr_t', c_array)) + + +path = '/home/jan/projects/masque/test.gds' + +#clib = ctypes.CDLL('./libklamath_rs_ext.so') +clib = ffi.dlopen('./libklamath_rs_ext.so') + + +ret_ptr_array = ffi.new('struct ArrowArray[]', 1) +ret_ptr_schema = ffi.new('struct ArrowSchema[]', 1) +ffi.cdef('void read_path(char* path, struct ArrowArray* array, struct ArrowSchema* schema);') +print(f'{ret_ptr_array[0]=}, {ret_ptr_schema[0]=}') + +clib.read_path(path.encode(), ret_ptr_array, ret_ptr_schema) + +ptr_schema = int(ffi.cast('uintptr_t', ret_ptr_schema)) +ptr_array = int(ffi.cast('uintptr_t', ret_ptr_array)) + + +print(f'{ret_ptr_array[0]=}, {ret_ptr_schema[0]=}') +print(f'python {ptr_array=:x} {ptr_schema=:x}') +#print(f'{ret_ptr_array[0].buffers=} {ret_ptr_schema[0][0]=}') + +arr_new = pyarrow.Array._import_from_c(ptr_array, ptr_schema) + + + From 1b3a6a45ce35daf4c9c3c5a9fa769e76c6610242 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 12 Apr 2025 12:51:26 -0700 Subject: [PATCH 22/31] minor cleanup --- src/lib.rs | 4 +--- test.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 33b7598..78e359b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,9 +40,7 @@ pub unsafe extern "C" fn read_path( let input = fs::read(path).expect("File read failed"); let (_input, struct_arr) = read_library(&input).expect("Read failed"); - let (mut arr_v, mut schema_v) = to_ffi(&struct_arr.to_data()).unwrap(); - *arr = arr_v; - *schema = schema_v; + (*arr, *schema) = to_ffi(&struct_arr.to_data()).unwrap(); } diff --git a/test.py b/test.py index 998dff1..788df2e 100644 --- a/test.py +++ b/test.py @@ -10,8 +10,8 @@ from pyarrow.cffi import ffi path = '/home/jan/projects/masque/test.gds' -#clib = ctypes.CDLL('./libklamath_rs_ext.so') -clib = ffi.dlopen('./libklamath_rs_ext.so') +#clib = ctypes.CDLL('target/debug/libklamath_rs_ext.so') +clib = ffi.dlopen('target/debug/libklamath_rs_ext.so') ret_ptr_array = ffi.new('struct ArrowArray[]', 1) From 9c98ee2b97eb03d32bf6936b424041c0c96799d6 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 12 Apr 2025 23:02:08 -0700 Subject: [PATCH 23/31] return masque-style (dx, dy) vectors for AREF --- src/elements.rs | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/elements.rs b/src/elements.rs index ee6e7e8..db5eaa1 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -374,32 +374,40 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; + let x = xy_iter.next().unwrap(); + let y = xy_iter.next().unwrap(); let x_builder = ref_builder.field_builder::(4).unwrap(); - x_builder.append_value(xy_iter.next().unwrap()); + x_builder.append_value(x); let y_builder = ref_builder.field_builder::(5).unwrap(); - y_builder.append_value(xy_iter.next().unwrap()); + y_builder.append_value(y); let rep_builder = ref_builder.field_builder::(6).unwrap(); println!("ref, {is_aref:?}"); if is_aref { - let x0_builder = rep_builder.field_builder::(0).unwrap(); - x0_builder.append_value(xy_iter.next().unwrap()); - let y0_builder = rep_builder.field_builder::(1).unwrap(); - y0_builder.append_value(xy_iter.next().unwrap()); - let x1_builder = rep_builder.field_builder::(2).unwrap(); - x1_builder.append_value(xy_iter.next().unwrap()); - let y1_builder = rep_builder.field_builder::(3).unwrap(); - y1_builder.append_value(xy_iter.next().unwrap()); - - match colrow { - None => return fail(input, "AREF without COLROW before XY".to_string()), - Some((count0, count1)) => { - let count0_builder = rep_builder.field_builder::(4).unwrap(); - count0_builder.append_value(count0); - let count1_builder = rep_builder.field_builder::(5).unwrap(); - count1_builder.append_value(count1); - }, + if colrow.is_none() { + return fail(input, "AREF without COLROW before XY".to_string()) } + let (count0, count1) = colrow.unwrap(); + + let x0a = (xy_iter.next().unwrap() - x) / (count0 as i32); + let y0a = (xy_iter.next().unwrap() - y) / (count0 as i32); + let x1a = (xy_iter.next().unwrap() - x) / (count1 as i32); + let y1a = (xy_iter.next().unwrap() - y) / (count1 as i32); + + let x0_builder = rep_builder.field_builder::(0).unwrap(); + x0_builder.append_value(x0a); + let y0_builder = rep_builder.field_builder::(1).unwrap(); + y0_builder.append_value(y0a); + let x1_builder = rep_builder.field_builder::(2).unwrap(); + x1_builder.append_value(x1a); + let y1_builder = rep_builder.field_builder::(3).unwrap(); + y1_builder.append_value(y1a); + + let count0_builder = rep_builder.field_builder::(4).unwrap(); + count0_builder.append_value(count0); + let count1_builder = rep_builder.field_builder::(5).unwrap(); + count1_builder.append_value(count1); + } else { let x0_builder = rep_builder.field_builder::(0).unwrap(); x0_builder.append_null(); From 259df49a221d33cf8f9cb94904560d17f39e2612 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 20 Apr 2025 23:21:54 -0700 Subject: [PATCH 24/31] use global dictionary approach for layers maybe there's a way to do this with arrow dictionaries? --- src/elements.rs | 136 ++++++++++++++++++++++++++++++------------------ src/library.rs | 36 ++++++++----- 2 files changed, 106 insertions(+), 66 deletions(-) diff --git a/src/elements.rs b/src/elements.rs index db5eaa1..60aeeb2 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -29,18 +29,38 @@ pub type DListBuilder = ListBuilder>; pub type FListBuilder = FixedSizeListBuilder>; -pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, names: &mut HashMap) -> IResult<'a, ()> { +fn insert_layer( + layer: i16, + dtype: i16, + layers: &mut HashMap, + struct_builder: &mut StructBuilder, + field_index: usize, + ) { + let layer32 = ((layer as u16 as u32) << 16) | (dtype as u16 as u32); + let next_id = layers.len(); + let id = layers.entry(layer32).or_insert(next_id.try_into().unwrap()); + let layer_builder = struct_builder.field_builder::(field_index).unwrap(); + layer_builder.append_value(*id); +} + + +pub fn read_elements<'a>( + input: &'a [u8], + cell_builder: &mut StructBuilder, + names: &mut HashMap, + layers: &mut HashMap, + ) -> IResult<'a, ()> { let mut input = input; let (_, mut header) = RecordHeader::read(input)?; // don't consume tag while header.tag != records::RTAG_ENDSTR { (input, _) = match header.tag { records::RTAG_SREF => read_ref(input, cell_builder, header.tag, names)?, records::RTAG_AREF => read_ref(input, cell_builder, header.tag, names)?, - records::RTAG_BOUNDARY => read_boundary(input, cell_builder)?, - records::RTAG_PATH => read_path(input, cell_builder)?, - records::RTAG_NODE => read_boxnode(input, cell_builder, header.tag)?, - records::RTAG_BOX => read_boxnode(input, cell_builder, header.tag)?, - records::RTAG_TEXT => read_text(input, cell_builder)?, + records::RTAG_BOUNDARY => read_boundary(input, cell_builder, layers)?, + records::RTAG_PATH => read_path(input, cell_builder, layers)?, + records::RTAG_NODE => read_boxnode(input, cell_builder, header.tag, layers)?, + records::RTAG_BOX => read_boxnode(input, cell_builder, header.tag, layers)?, + records::RTAG_TEXT => read_text(input, cell_builder, layers)?, _ => { // don't care, skip let result = take_bytes(input, header.data_size + 4)?; @@ -68,21 +88,21 @@ pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, name } -pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> { +pub fn read_boundary<'a>( + input: &'a [u8], + cell_builder: &mut StructBuilder, + layers: &mut HashMap, + ) -> IResult<'a, ()> { let boundaries_builder = cell_builder.field_builder::(3).unwrap(); let boundary_builder = boundaries_builder.values().as_any_mut().downcast_mut::().unwrap(); let (input, _) = records::BOUNDARY::read(input)?; let (input, layer) = LAYER::skip_and_read(input)?; - let layer_builder = boundary_builder.field_builder::(0).unwrap(); - layer_builder.append_value(layer); - let (input, dtype) = DATATYPE::read(input)?; - let dtype_builder = boundary_builder.field_builder::(1).unwrap(); - dtype_builder.append_value(dtype); + insert_layer(layer, dtype, layers, boundary_builder, 0); - let xys_builder = boundary_builder.field_builder::(2).unwrap(); + let xys_builder = boundary_builder.field_builder::(1).unwrap(); let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); let (input, xy_iter) = XY::read(input)?; for xy in xy_iter { @@ -90,7 +110,7 @@ pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> I } xys_builder.append(true); - let props_builder = boundary_builder.field_builder::(3).unwrap(); + let props_builder = boundary_builder.field_builder::(2).unwrap(); let (input, ()) = read_properties(input, props_builder)?; boundary_builder.append(true); @@ -98,7 +118,11 @@ pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> I } -pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> { +pub fn read_path<'a>( + input: &'a [u8], + cell_builder: &mut StructBuilder, + layers: &mut HashMap, + ) -> IResult<'a, ()> { let paths_builder = cell_builder.field_builder::(4).unwrap(); let path_builder = paths_builder.values().as_any_mut().downcast_mut::().unwrap(); @@ -106,10 +130,12 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = DATATYPE::read(input)?; - let layer_builder = path_builder.field_builder::(0).unwrap(); - layer_builder.append_value(layer); - let dtype_builder = path_builder.field_builder::(1).unwrap(); - dtype_builder.append_value(dtype); + insert_layer(layer, dtype, layers, path_builder, 0); + //let layer32 = (layer as u16 as u32) << 16) | (dtype as u16 as u32) + //let next_id = layers.len(); + //let id = layers.entry(layer32).or_insert(next_id.try_into().unwrap()); + //let layer_builder = path_builder.field_builder::(0).unwrap(); + //layer_builder.append_value((id); let mut path_type = None; let mut width = None; @@ -144,16 +170,16 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu }; (input, header) = RecordHeader::read(input)?; } - let path_type_builder = path_builder.field_builder::(2).unwrap(); + let path_type_builder = path_builder.field_builder::(1).unwrap(); path_type_builder.append_option(path_type); - let ext0_builder = path_builder.field_builder::(3).unwrap(); + let ext0_builder = path_builder.field_builder::(2).unwrap(); ext0_builder.append_option(bgn_ext); - let ext1_builder = path_builder.field_builder::(4).unwrap(); + let ext1_builder = path_builder.field_builder::(3).unwrap(); ext1_builder.append_option(end_ext); - let width_builder = path_builder.field_builder::(5).unwrap(); + let width_builder = path_builder.field_builder::(4).unwrap(); width_builder.append_option(width); - let xys_builder = path_builder.field_builder::(6).unwrap(); + let xys_builder = path_builder.field_builder::(5).unwrap(); let (input, xy_iter) = XY::read_data(input, header.data_size)?; for xy in xy_iter { let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); @@ -161,14 +187,19 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu } xys_builder.append(true); - let props_builder = path_builder.field_builder::(7).unwrap(); + let props_builder = path_builder.field_builder::(6).unwrap(); let (input, ()) = read_properties(input, props_builder)?; path_builder.append(true); Ok((input, ())) } -pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16) -> IResult<'a, ()> { +pub fn read_boxnode<'a>( + input: &'a [u8], + cell_builder: &mut StructBuilder, + tag: u16, + layers: &mut HashMap, + ) -> IResult<'a, ()> { let field_num = match tag { records::RTAG_NODE => 5, records::RTAG_BOX => 6, @@ -184,19 +215,15 @@ pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: _ => return fail(input, format!("Unexpected tag {:04x}", tag)), }; - let layer_builder = boxnode_builder.field_builder::(0).unwrap(); let (input, layer) = LAYER::skip_and_read(input)?; - layer_builder.append_value(layer); - let (input, dtype) = match tag { records::RTAG_NODE => NODETYPE::read(input)?, records::RTAG_BOX => BOXTYPE::read(input)?, _ => return fail(input, format!("Unexpected tag {:04x}", tag)), }; - let dtype_builder = boxnode_builder.field_builder::(1).unwrap(); - dtype_builder.append_value(dtype); + insert_layer(layer, dtype, layers, boxnode_builder, 0); - let xys_builder = boxnode_builder.field_builder::(2).unwrap(); + let xys_builder = boxnode_builder.field_builder::(1).unwrap(); let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); let (input, xy_iter) = XY::read(input)?; for xy in xy_iter { @@ -204,14 +231,18 @@ pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: } xys_builder.append(true); - let props_builder = boxnode_builder.field_builder::(3).unwrap(); + let props_builder = boxnode_builder.field_builder::(2).unwrap(); let (input, ()) = read_properties(input, props_builder)?; boxnode_builder.append(true); Ok((input, ())) } -pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> { +pub fn read_text<'a>( + input: &'a [u8], + cell_builder: &mut StructBuilder, + layers: &mut HashMap, + ) -> IResult<'a, ()> { let texts_builder = cell_builder.field_builder::(7).unwrap(); let text_builder = texts_builder.values().as_any_mut().downcast_mut::().unwrap(); @@ -225,12 +256,8 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let mut angle_deg = None; let (input, layer) = LAYER::skip_and_read(input)?; - let layer_builder = text_builder.field_builder::(0).unwrap(); - layer_builder.append_value(layer); - let (input, dtype) = TEXTTYPE::read(input)?; - let dtype_builder = text_builder.field_builder::(1).unwrap(); - dtype_builder.append_value(dtype); + insert_layer(layer, dtype, layers, text_builder, 0); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_XY { @@ -274,35 +301,35 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu (input, header) = RecordHeader::read(input)?; } - let pres_hori_builder = text_builder.field_builder::(2).unwrap(); + let pres_hori_builder = text_builder.field_builder::(1).unwrap(); pres_hori_builder.append_option(pres_hori); - let pres_vert_builder = text_builder.field_builder::(3).unwrap(); + let pres_vert_builder = text_builder.field_builder::(2).unwrap(); pres_vert_builder.append_option(pres_vert); - let pres_font_builder = text_builder.field_builder::(4).unwrap(); + let pres_font_builder = text_builder.field_builder::(3).unwrap(); pres_font_builder.append_option(pres_font); - let path_type_builder = text_builder.field_builder::(5).unwrap(); + let path_type_builder = text_builder.field_builder::(4).unwrap(); path_type_builder.append_option(path_type); - let width_builder = text_builder.field_builder::(6).unwrap(); + let width_builder = text_builder.field_builder::(5).unwrap(); width_builder.append_option(width); - let inv_builder = text_builder.field_builder::(7).unwrap(); + let inv_builder = text_builder.field_builder::(6).unwrap(); inv_builder.append_option(invert_y); - let mag_builder = text_builder.field_builder::(8).unwrap(); + let mag_builder = text_builder.field_builder::(7).unwrap(); mag_builder.append_option(mag); - let angle_builder = text_builder.field_builder::(9).unwrap(); + let angle_builder = text_builder.field_builder::(8).unwrap(); angle_builder.append_option(angle_deg); let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; - let x_builder = text_builder.field_builder::(10).unwrap(); + let x_builder = text_builder.field_builder::(9).unwrap(); x_builder.append_value(xy_iter.next().unwrap()); - let y_builder = text_builder.field_builder::(11).unwrap(); + let y_builder = text_builder.field_builder::(10).unwrap(); y_builder.append_value(xy_iter.next().unwrap()); let (input, string_bytes) = STRING::read(input)?; let string = String::from_utf8(string_bytes).unwrap(); - let string_builder = text_builder.field_builder::(12).unwrap(); + let string_builder = text_builder.field_builder::(11).unwrap(); string_builder.append_value(string); - let props_builder = text_builder.field_builder::(13).unwrap(); + let props_builder = text_builder.field_builder::(12).unwrap(); let (input, ()) = read_properties(input, props_builder)?; text_builder.append(true); @@ -311,7 +338,12 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu -pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, names: &mut HashMap) -> IResult<'a, ()> { +pub fn read_ref<'a>( + input: &'a [u8], + cell_builder: &mut StructBuilder, + tag: u16, + names: &mut HashMap, + ) -> IResult<'a, ()> { let (input, _) = match tag { records::RTAG_SREF => records::SREF::read(input)?, records::RTAG_AREF => records::AREF::read(input)?, diff --git a/src/library.rs b/src/library.rs index c2c4e13..18e5a2b 100644 --- a/src/library.rs +++ b/src/library.rs @@ -120,7 +120,6 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("count1", DataType::Int16, false), ])); - let ref_struct_t = DataType::Struct(Fields::from(vec![ Field::new("target", DataType::UInt32, false), Field::new("invert_y", DataType::Boolean, true), @@ -132,10 +131,8 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("properties", property_list_t.clone(), true), ])); - let text_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::Int16, false), - Field::new("dtype", DataType::Int16, false), + Field::new("layer", DataType::UInt32, false), Field::new("presentation_horiz", DataType::UInt8, true), Field::new("presentation_vert", DataType::UInt8, true), Field::new("presentation_font", DataType::UInt8, true), @@ -156,15 +153,13 @@ pub fn read_library(input: &[u8]) -> IResult { )); let boundary_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::Int16, false), - Field::new("dtype", DataType::Int16, false), + Field::new("layer", DataType::UInt32, false), Field::new("xy", coords_t.clone(), false), Field::new("properties", property_list_t.clone(), true), ])); let path_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::Int16, false), - Field::new("dtype", DataType::Int16, false), + Field::new("layer", DataType::UInt32, false), Field::new("path_type", DataType::Int16, true), Field::new("extension_start", DataType::Int32, true), Field::new("extension_end", DataType::Int32, true), @@ -174,8 +169,7 @@ pub fn read_library(input: &[u8]) -> IResult { ])); let boxnode_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::Int16, false), - Field::new("dtype", DataType::Int16, false), + Field::new("layer", DataType::UInt32, false), Field::new("xy", coords_t.clone(), false), Field::new("properties", property_list_t.clone(), true), ])); @@ -205,6 +199,10 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new_list_field(DataType::Utf8, false) )); + let layer_list_t = DataType::List(Arc::new( + Field::new_list_field(DataType::UInt32, false) + )); + let time_t = DataType::FixedSizeList(Arc::new( Field::new_list_field(DataType::Int16, false), ), @@ -232,8 +230,9 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("lib_name", DataType::Utf8, false), Field::new("mod_time", time_t.clone(), false), Field::new("acc_time", time_t.clone(), false), - Field::new("cell_names", name_list_t, false), Field::new("cells", cells_list_t, false), + Field::new("cell_names", name_list_t, false), + Field::new("layers", layer_list_t, false), ], 0, ); @@ -255,7 +254,8 @@ pub fn read_library(input: &[u8]) -> IResult { atl_builder.append(true); let mut names = HashMap::::new(); - let cells_builder = lib_builder.field_builder::(6).unwrap(); + let mut layers = HashMap::::new(); + let cells_builder = lib_builder.field_builder::(5).unwrap(); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_ENDLIB { @@ -277,7 +277,7 @@ pub fn read_library(input: &[u8]) -> IResult { let offset_builder = cell_builder.field_builder::(1).unwrap(); offset_builder.append_value(position.try_into().unwrap()); - (input, _) = read_elements(input, cell_builder, &mut names)?; + (input, _) = read_elements(input, cell_builder, &mut names, &mut layers)?; cell_builder.append(true); } @@ -286,13 +286,21 @@ pub fn read_library(input: &[u8]) -> IResult { cells_builder.append(true); let mut ids: HashMap = names.into_iter().map(|(kk, vv)| (vv, kk)).collect(); - let names_builder = lib_builder.field_builder::(5).unwrap(); + let names_builder = lib_builder.field_builder::(6).unwrap(); let name_builder = names_builder.values().as_any_mut().downcast_mut::().unwrap(); for id in 0..ids.len() { name_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); } names_builder.append(true); + let mut layer_ids: HashMap = layers.into_iter().map(|(kk, vv)| (vv, kk)).collect(); + let layers_builder = lib_builder.field_builder::(7).unwrap(); + let layer_builder = layers_builder.values().as_any_mut().downcast_mut::().unwrap(); + for layer_id in 0..layer_ids.len() { + layer_builder.append_value(layer_ids.remove(&layer_id.try_into().unwrap()).unwrap()); + } + layers_builder.append(true); + lib_builder.append(true); let lib = lib_builder.finish(); Ok((input, lib)) From 55761d2e54298959535cda944518e83be8384b70 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Tue, 22 Apr 2025 20:14:51 -0700 Subject: [PATCH 25/31] propvalue might be any size --- src/records.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/records.rs b/src/records.rs index 5a7e2cc..27f7eaf 100644 --- a/src/records.rs +++ b/src/records.rs @@ -392,7 +392,7 @@ impl Record for PROPATTR { pub struct PROPVALUE; impl Record for PROPVALUE { fn tag() -> u16 { RTAG_PROPVALUE } - fn expected_size() -> Option { Some(2) } + fn expected_size() -> Option { None } } pub struct BOX; From 9dbfb5e4ec2df57f914912efcb92907e1ad65f06 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Tue, 22 Apr 2025 20:14:59 -0700 Subject: [PATCH 26/31] style --- src/basic.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/basic.rs b/src/basic.rs index 47e2427..8bad4e7 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -88,8 +88,8 @@ pub fn parse_datetime(input: &[u8]) -> IResult<[i16; 6]> { pub fn parse_bitarray(input: &[u8]) -> IResult<[bool; 16]> { let mut bits = [false; 16]; let (input, val) = parse_int2(input)?; - for ii in 0..16 { - bits[ii] = ((val >> (16 - 1 - ii)) & 0x01) == 1; + for (ii, bit) in bits.iter_mut().enumerate() { + *bit = ((val >> (16 - 1 - ii)) & 0x01) == 1; } Ok((input, bits)) } @@ -111,8 +111,8 @@ pub fn parse_ascii(input: &[u8], length: u16) -> IResult> { pub fn bitarray2int(bits: &[bool; 16]) -> u16 { let mut int: u16 = 0; - for ii in 0..16 { - int |= (bits[ii] as u16) << (16 - 1 - ii); + for (ii, bit) in bits.iter().enumerate() { + int |= (*bit as u16) << (16 - 1 - ii); } int } From 78d1d6e7de7fb3c94d1a816e3d7a77e105ab083a Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Tue, 22 Apr 2025 20:16:38 -0700 Subject: [PATCH 27/31] lump together x and y, and counts --- src/elements.rs | 77 ++++++++++++++++++++++--------------------------- src/library.rs | 15 ++++------ 2 files changed, 39 insertions(+), 53 deletions(-) diff --git a/src/elements.rs b/src/elements.rs index 60aeeb2..7601a63 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -21,7 +21,7 @@ use std::collections::HashMap; use arrow::array::{ StructBuilder, FixedSizeListBuilder, ListBuilder, StringBuilder, ArrayBuilder, BooleanBuilder, - Int32Builder, Int16Builder, UInt32Builder, UInt8Builder, Float64Builder, + Int32Builder, Int16Builder, UInt64Builder, UInt32Builder, UInt8Builder, Float64Builder, }; @@ -319,17 +319,18 @@ pub fn read_text<'a>( angle_builder.append_option(angle_deg); let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; - let x_builder = text_builder.field_builder::(9).unwrap(); - x_builder.append_value(xy_iter.next().unwrap()); - let y_builder = text_builder.field_builder::(10).unwrap(); - y_builder.append_value(xy_iter.next().unwrap()); + let xx = xy_iter.next().unwrap(); + let yy = xy_iter.next().unwrap(); + let xy = ((xx as u32 as u64) << 32) | (yy as u32 as u64); + let xy_builder = text_builder.field_builder::(9).unwrap(); + xy_builder.append_value(xy); let (input, string_bytes) = STRING::read(input)?; let string = String::from_utf8(string_bytes).unwrap(); - let string_builder = text_builder.field_builder::(11).unwrap(); + let string_builder = text_builder.field_builder::(10).unwrap(); string_builder.append_value(string); - let props_builder = text_builder.field_builder::(12).unwrap(); + let props_builder = text_builder.field_builder::(11).unwrap(); let (input, ()) = read_properties(input, props_builder)?; text_builder.append(true); @@ -406,57 +407,47 @@ pub fn read_ref<'a>( let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; - let x = xy_iter.next().unwrap(); - let y = xy_iter.next().unwrap(); - let x_builder = ref_builder.field_builder::(4).unwrap(); - x_builder.append_value(x); - let y_builder = ref_builder.field_builder::(5).unwrap(); - y_builder.append_value(y); + let xx = xy_iter.next().unwrap(); + let yy = xy_iter.next().unwrap(); + let xy = ((xx as u32 as u64) << 32) | (yy as u32 as u64); + let xy_builder = ref_builder.field_builder::(4).unwrap(); + xy_builder.append_value(xy); - let rep_builder = ref_builder.field_builder::(6).unwrap(); println!("ref, {is_aref:?}"); + let rep_builder = ref_builder.field_builder::(5).unwrap(); if is_aref { if colrow.is_none() { return fail(input, "AREF without COLROW before XY".to_string()) } let (count0, count1) = colrow.unwrap(); - let x0a = (xy_iter.next().unwrap() - x) / (count0 as i32); - let y0a = (xy_iter.next().unwrap() - y) / (count0 as i32); - let x1a = (xy_iter.next().unwrap() - x) / (count1 as i32); - let y1a = (xy_iter.next().unwrap() - y) / (count1 as i32); + let x0a = (xy_iter.next().unwrap() - xx) / (count0 as i32); + let y0a = (xy_iter.next().unwrap() - yy) / (count0 as i32); + let x1a = (xy_iter.next().unwrap() - xx) / (count1 as i32); + let y1a = (xy_iter.next().unwrap() - yy) / (count1 as i32); - let x0_builder = rep_builder.field_builder::(0).unwrap(); - x0_builder.append_value(x0a); - let y0_builder = rep_builder.field_builder::(1).unwrap(); - y0_builder.append_value(y0a); - let x1_builder = rep_builder.field_builder::(2).unwrap(); - x1_builder.append_value(x1a); - let y1_builder = rep_builder.field_builder::(3).unwrap(); - y1_builder.append_value(y1a); + let xy0a = ((x0a as u32 as u64) << 32) | (y0a as u32 as u64); + let xy1a = ((x1a as u32 as u64) << 32) | (y1a as u32 as u64); + let xy0_builder = rep_builder.field_builder::(0).unwrap(); + xy0_builder.append_value(xy0a); + let xy1_builder = rep_builder.field_builder::(1).unwrap(); + xy1_builder.append_value(xy1a); - let count0_builder = rep_builder.field_builder::(4).unwrap(); - count0_builder.append_value(count0); - let count1_builder = rep_builder.field_builder::(5).unwrap(); - count1_builder.append_value(count1); + let counts = ((count0 as u16 as u32) << 16) | (count1 as u16 as u32); + let counts_builder = rep_builder.field_builder::(2).unwrap(); + counts_builder.append_value(counts); } else { - let x0_builder = rep_builder.field_builder::(0).unwrap(); - x0_builder.append_null(); - let y0_builder = rep_builder.field_builder::(1).unwrap(); - y0_builder.append_null(); - let x1_builder = rep_builder.field_builder::(2).unwrap(); - x1_builder.append_null(); - let y1_builder = rep_builder.field_builder::(3).unwrap(); - y1_builder.append_null(); - let count0_builder = rep_builder.field_builder::(4).unwrap(); - count0_builder.append_null(); - let count1_builder = rep_builder.field_builder::(5).unwrap(); - count1_builder.append_null(); + let xy0_builder = rep_builder.field_builder::(0).unwrap(); + xy0_builder.append_null(); + let xy1_builder = rep_builder.field_builder::(1).unwrap(); + xy1_builder.append_null(); + let counts_builder = rep_builder.field_builder::(2).unwrap(); + counts_builder.append_null(); } rep_builder.append(is_aref); - let props_builder = ref_builder.field_builder::(7).unwrap(); + let props_builder = ref_builder.field_builder::(6).unwrap(); let (input, ()) = read_properties(input, props_builder)?; ref_builder.append(true); diff --git a/src/library.rs b/src/library.rs index 18e5a2b..a9b4855 100644 --- a/src/library.rs +++ b/src/library.rs @@ -112,12 +112,9 @@ pub fn read_library(input: &[u8]) -> IResult { let repetition_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("x0", DataType::Int32, false), - Field::new("y0", DataType::Int32, false), - Field::new("x1", DataType::Int32, false), - Field::new("y1", DataType::Int32, false), - Field::new("count0", DataType::Int16, false), - Field::new("count1", DataType::Int16, false), + Field::new("xy0", DataType::UInt64, false), + Field::new("xy1", DataType::UInt64, false), + Field::new("counts", DataType::UInt32, false), ])); let ref_struct_t = DataType::Struct(Fields::from(vec![ @@ -125,8 +122,7 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("invert_y", DataType::Boolean, true), Field::new("mag", DataType::Float64, true), Field::new("angle_deg", DataType::Float64, true), - Field::new("x", DataType::Int32, false), - Field::new("y", DataType::Int32, false), + Field::new("xy", DataType::UInt64, false), Field::new("repetition", repetition_struct_t, true), Field::new("properties", property_list_t.clone(), true), ])); @@ -141,8 +137,7 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("invert_y", DataType::Boolean, true), Field::new("mag", DataType::Float64, true), Field::new("angle_deg", DataType::Float64, true), - Field::new("x", DataType::Int32, false), - Field::new("y", DataType::Int32, false), + Field::new("xy", DataType::UInt64, false), Field::new("string", DataType::Utf8, false), Field::new("properties", property_list_t.clone(), true), ])); From 029662d887095c67100dde8b853a0b56bc15b3f0 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Tue, 22 Apr 2025 20:17:17 -0700 Subject: [PATCH 28/31] cleanup --- src/elements.rs | 7 +------ src/library.rs | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/elements.rs b/src/elements.rs index 7601a63..4dd8969 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -131,11 +131,6 @@ pub fn read_path<'a>( let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = DATATYPE::read(input)?; insert_layer(layer, dtype, layers, path_builder, 0); - //let layer32 = (layer as u16 as u32) << 16) | (dtype as u16 as u32) - //let next_id = layers.len(); - //let id = layers.entry(layer32).or_insert(next_id.try_into().unwrap()); - //let layer_builder = path_builder.field_builder::(0).unwrap(); - //layer_builder.append_value((id); let mut path_type = None; let mut width = None; @@ -413,8 +408,8 @@ pub fn read_ref<'a>( let xy_builder = ref_builder.field_builder::(4).unwrap(); xy_builder.append_value(xy); - println!("ref, {is_aref:?}"); let rep_builder = ref_builder.field_builder::(5).unwrap(); +// println!("ref, {is_aref:?}"); if is_aref { if colrow.is_none() { return fail(input, "AREF without COLROW before XY".to_string()) diff --git a/src/library.rs b/src/library.rs index a9b4855..abe4ca4 100644 --- a/src/library.rs +++ b/src/library.rs @@ -259,7 +259,7 @@ pub fn read_library(input: &[u8]) -> IResult { let name_bytes; (input, name_bytes) = records::STRNAME::read(input)?; let name = String::from_utf8(name_bytes).unwrap(); - println!("{name}"); + //println!("{name}"); let next_id = names.len(); let id = names.entry(name).or_insert(next_id.try_into().unwrap()); From 572b39aca080dd096d657e99a4ded664cb74d406 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Tue, 22 Apr 2025 20:17:23 -0700 Subject: [PATCH 29/31] enable lto --- Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index aa0b4a8..87243ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,7 @@ crate-type = ["cdylib", "rlib"] [dependencies] byteorder = "^1" arrow = {version = "^54", features = ["ffi"]} + + +[profile.release] +lto = true From 22040d9432d6535f2a92b321cb591a473b0b8ffb Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 2 Apr 2026 20:22:15 -0700 Subject: [PATCH 30/31] performance work --- src/elements.rs | 598 +++++++++++++++++++++++++++++++++++++++++------- src/library.rs | 91 ++++++-- 2 files changed, 592 insertions(+), 97 deletions(-) diff --git a/src/elements.rs b/src/elements.rs index 4dd8969..f44059c 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -28,6 +28,68 @@ use arrow::array::{ pub type DListBuilder = ListBuilder>; pub type FListBuilder = FixedSizeListBuilder>; +struct PropertyRecord { + key: i16, + value: String, +} + +struct BoundaryRecord { + layer_id: u32, + vertices: Vec, + properties: Vec, +} + +struct BoundaryBatchRecord { + layer_id: u32, + vertices: Vec, + vertex_offsets: Vec, +} + +struct RectBatchRecord { + layer_id: u32, + rects: Vec, +} + +struct SRefRecord { + target_id: u32, + invert_y: bool, + scale: f64, + angle_rad: f64, + xy: u64, + properties: Vec, +} + +struct ARefRecord { + target_id: u32, + invert_y: bool, + scale: f64, + angle_rad: f64, + xy: u64, + xy0: u64, + xy1: u64, + counts: u32, + properties: Vec, +} + +enum RefRecord { + SRef(SRefRecord), + ARef(ARefRecord), +} + + +const UNIT_COUNTS: u32 = (1_u32 << 16) | 1_u32; + + +fn layer_id_for( + layer: i16, + dtype: i16, + layers: &mut HashMap, + ) -> u32 { + let layer32 = ((layer as u16 as u32) << 16) | (dtype as u16 as u32); + let next_id = layers.len(); + *layers.entry(layer32).or_insert(next_id.try_into().unwrap()) +} + fn insert_layer( layer: i16, @@ -36,11 +98,9 @@ fn insert_layer( struct_builder: &mut StructBuilder, field_index: usize, ) { - let layer32 = ((layer as u16 as u32) << 16) | (dtype as u16 as u32); - let next_id = layers.len(); - let id = layers.entry(layer32).or_insert(next_id.try_into().unwrap()); + let id = layer_id_for(layer, dtype, layers); let layer_builder = struct_builder.field_builder::(field_index).unwrap(); - layer_builder.append_value(*id); + layer_builder.append_value(id); } @@ -51,12 +111,25 @@ pub fn read_elements<'a>( layers: &mut HashMap, ) -> IResult<'a, ()> { let mut input = input; + let mut boundaries = Vec::::new(); + let mut srefs = Vec::::new(); + let mut arefs = Vec::::new(); let (_, mut header) = RecordHeader::read(input)?; // don't consume tag while header.tag != records::RTAG_ENDSTR { (input, _) = match header.tag { - records::RTAG_SREF => read_ref(input, cell_builder, header.tag, names)?, - records::RTAG_AREF => read_ref(input, cell_builder, header.tag, names)?, - records::RTAG_BOUNDARY => read_boundary(input, cell_builder, layers)?, + records::RTAG_SREF | records::RTAG_AREF => { + let (next_input, ref_record) = read_ref(input, header.tag, names)?; + match ref_record { + RefRecord::SRef(sref) => srefs.push(sref), + RefRecord::ARef(aref) => arefs.push(aref), + } + (next_input, ()) + }, + records::RTAG_BOUNDARY => { + let (next_input, boundary) = read_boundary(input, layers)?; + boundaries.push(boundary); + (next_input, ()) + }, records::RTAG_PATH => read_path(input, cell_builder, layers)?, records::RTAG_NODE => read_boxnode(input, cell_builder, header.tag, layers)?, records::RTAG_BOX => read_boxnode(input, cell_builder, header.tag, layers)?, @@ -71,50 +144,363 @@ pub fn read_elements<'a>( } (input, _) = take_bytes(input, 4_usize)?; // consume endstr tag - let refs_builder = cell_builder.field_builder::(2).unwrap(); - refs_builder.append(true); - let boundaries_builder = cell_builder.field_builder::(3).unwrap(); - boundaries_builder.append(true); - let paths_builder = cell_builder.field_builder::(4).unwrap(); + let mut sref_plain = Vec::::with_capacity(srefs.len() + arefs.len() / 4); + let mut sref_props = Vec::::with_capacity(srefs.len() / 16 + arefs.len() / 64); + for sref in srefs { + if sref.properties.is_empty() { + sref_plain.push(sref); + } else { + sref_props.push(sref); + } + } + + let mut aref_plain = Vec::::with_capacity(arefs.len()); + let mut aref_props = Vec::::with_capacity(arefs.len() / 16); + for aref in arefs { + if aref.counts == UNIT_COUNTS { + let sref = aref_to_sref(aref); + if sref.properties.is_empty() { + sref_plain.push(sref); + } else { + sref_props.push(sref); + } + } else if aref.properties.is_empty() { + aref_plain.push(aref); + } else { + aref_props.push(aref); + } + } + sref_plain.sort_by_key(|sref| sref.target_id); + aref_plain.sort_by_key(|aref| aref.target_id); + + let mut boundary_plain = Vec::::with_capacity(boundaries.len()); + let mut boundary_props = Vec::::with_capacity(boundaries.len() / 16); + for boundary in boundaries { + if boundary.properties.is_empty() { + boundary_plain.push(boundary); + } else { + boundary_props.push(boundary); + } + } + boundary_plain.sort_by_key(|boundary| boundary.layer_id); + + let mut rect_batches = Vec::::new(); + let mut boundary_batches = Vec::::new(); + let mut ii = 0; + while ii < boundary_plain.len() { + let start = ii; + let layer_id = boundary_plain[ii].layer_id; + while ii < boundary_plain.len() && boundary_plain[ii].layer_id == layer_id { + ii += 1; + } + + let group = &boundary_plain[start..ii]; + let rect_capacity = group.len() * 4; + let vertex_capacity = group.iter().map(|boundary| boundary.vertices.len()).sum(); + let mut rect_batch = RectBatchRecord { + layer_id, + rects: Vec::with_capacity(rect_capacity), + }; + let mut boundary_batch = BoundaryBatchRecord { + layer_id, + vertices: Vec::with_capacity(vertex_capacity), + vertex_offsets: Vec::with_capacity(group.len()), + }; + + let mut next_offset: u32 = 0; + for boundary in group { + if let Some(rect) = boundary_to_rect(&boundary.vertices) { + rect_batch.rects.extend_from_slice(&rect); + } else { + boundary_batch.vertex_offsets.push(next_offset); + boundary_batch.vertices.extend_from_slice(&boundary.vertices); + next_offset += (boundary.vertices.len() / 2) as u32; + } + } + if !rect_batch.rects.is_empty() { + rect_batches.push(rect_batch); + } + if !boundary_batch.vertex_offsets.is_empty() { + boundary_batches.push(boundary_batch); + } + } + + let sref_builder = cell_builder.field_builder::(2).unwrap(); + for sref in sref_plain { + append_sref(sref_builder, sref); + } + sref_builder.append(true); + + let aref_builder = cell_builder.field_builder::(3).unwrap(); + for aref in aref_plain { + append_aref(aref_builder, aref); + } + aref_builder.append(true); + + let sref_props_builder = cell_builder.field_builder::(4).unwrap(); + for sref in sref_props { + append_sref_prop(sref_props_builder, sref); + } + sref_props_builder.append(true); + + let aref_props_builder = cell_builder.field_builder::(5).unwrap(); + for aref in aref_props { + append_aref_prop(aref_props_builder, aref); + } + aref_props_builder.append(true); + + let rect_batches_builder = cell_builder.field_builder::(6).unwrap(); + for rect_batch in rect_batches { + append_rect_batch(rect_batches_builder, rect_batch); + } + rect_batches_builder.append(true); + + let boundary_batches_builder = cell_builder.field_builder::(7).unwrap(); + for boundary_batch in boundary_batches { + append_boundary_batch(boundary_batches_builder, boundary_batch); + } + boundary_batches_builder.append(true); + + let boundary_props_builder = cell_builder.field_builder::(8).unwrap(); + for boundary in boundary_props { + append_boundary_prop(boundary_props_builder, boundary); + } + boundary_props_builder.append(true); + + let paths_builder = cell_builder.field_builder::(9).unwrap(); paths_builder.append(true); - let nodes_builder = cell_builder.field_builder::(5).unwrap(); + let nodes_builder = cell_builder.field_builder::(10).unwrap(); nodes_builder.append(true); - let boxes_builder = cell_builder.field_builder::(6).unwrap(); + let boxes_builder = cell_builder.field_builder::(11).unwrap(); boxes_builder.append(true); - let texts_builder = cell_builder.field_builder::(7).unwrap(); + let texts_builder = cell_builder.field_builder::(12).unwrap(); texts_builder.append(true); Ok((input, ())) } -pub fn read_boundary<'a>( - input: &'a [u8], - cell_builder: &mut StructBuilder, - layers: &mut HashMap, - ) -> IResult<'a, ()> { - let boundaries_builder = cell_builder.field_builder::(3).unwrap(); - let boundary_builder = boundaries_builder.values().as_any_mut().downcast_mut::().unwrap(); +fn append_sref( + sref_builder: &mut DListBuilder, + sref: SRefRecord, + ) { + let sref_struct_builder = sref_builder.values().as_any_mut().downcast_mut::().unwrap(); + let target_builder = sref_struct_builder.field_builder::(0).unwrap(); + target_builder.append_value(sref.target_id); + let invert_builder = sref_struct_builder.field_builder::(1).unwrap(); + invert_builder.append_value(sref.invert_y); + let scale_builder = sref_struct_builder.field_builder::(2).unwrap(); + scale_builder.append_value(sref.scale); + let angle_builder = sref_struct_builder.field_builder::(3).unwrap(); + angle_builder.append_value(sref.angle_rad); + let xy_builder = sref_struct_builder.field_builder::(4).unwrap(); + xy_builder.append_value(sref.xy); + + sref_struct_builder.append(true); +} + + +fn append_aref( + aref_builder: &mut DListBuilder, + aref: ARefRecord, + ) { + let aref_struct_builder = aref_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let target_builder = aref_struct_builder.field_builder::(0).unwrap(); + target_builder.append_value(aref.target_id); + let invert_builder = aref_struct_builder.field_builder::(1).unwrap(); + invert_builder.append_value(aref.invert_y); + let scale_builder = aref_struct_builder.field_builder::(2).unwrap(); + scale_builder.append_value(aref.scale); + let angle_builder = aref_struct_builder.field_builder::(3).unwrap(); + angle_builder.append_value(aref.angle_rad); + let xy_builder = aref_struct_builder.field_builder::(4).unwrap(); + xy_builder.append_value(aref.xy); + let xy0_builder = aref_struct_builder.field_builder::(5).unwrap(); + xy0_builder.append_value(aref.xy0); + let xy1_builder = aref_struct_builder.field_builder::(6).unwrap(); + xy1_builder.append_value(aref.xy1); + let counts_builder = aref_struct_builder.field_builder::(7).unwrap(); + counts_builder.append_value(aref.counts); + + aref_struct_builder.append(true); +} + + +fn append_properties( + props_builder: &mut DListBuilder, + properties: Vec, + ) { + for prop in properties { + let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); + let key_builder = prop_builder.field_builder::(0).unwrap(); + key_builder.append_value(prop.key); + + let val_builder = prop_builder.field_builder::(1).unwrap(); + val_builder.append_value(prop.value); + + prop_builder.append(true); + } + props_builder.append(true); +} + + +fn append_sref_prop( + sref_props_builder: &mut DListBuilder, + sref: SRefRecord, + ) { + let sref_prop_builder = sref_props_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let target_builder = sref_prop_builder.field_builder::(0).unwrap(); + target_builder.append_value(sref.target_id); + let invert_builder = sref_prop_builder.field_builder::(1).unwrap(); + invert_builder.append_value(sref.invert_y); + let scale_builder = sref_prop_builder.field_builder::(2).unwrap(); + scale_builder.append_value(sref.scale); + let angle_builder = sref_prop_builder.field_builder::(3).unwrap(); + angle_builder.append_value(sref.angle_rad); + let xy_builder = sref_prop_builder.field_builder::(4).unwrap(); + xy_builder.append_value(sref.xy); + + let props_builder = sref_prop_builder.field_builder::(5).unwrap(); + append_properties(props_builder, sref.properties); + + sref_prop_builder.append(true); +} + + +fn append_aref_prop( + aref_props_builder: &mut DListBuilder, + aref: ARefRecord, + ) { + let aref_prop_builder = aref_props_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let target_builder = aref_prop_builder.field_builder::(0).unwrap(); + target_builder.append_value(aref.target_id); + let invert_builder = aref_prop_builder.field_builder::(1).unwrap(); + invert_builder.append_value(aref.invert_y); + let scale_builder = aref_prop_builder.field_builder::(2).unwrap(); + scale_builder.append_value(aref.scale); + let angle_builder = aref_prop_builder.field_builder::(3).unwrap(); + angle_builder.append_value(aref.angle_rad); + let xy_builder = aref_prop_builder.field_builder::(4).unwrap(); + xy_builder.append_value(aref.xy); + let xy0_builder = aref_prop_builder.field_builder::(5).unwrap(); + xy0_builder.append_value(aref.xy0); + let xy1_builder = aref_prop_builder.field_builder::(6).unwrap(); + xy1_builder.append_value(aref.xy1); + let counts_builder = aref_prop_builder.field_builder::(7).unwrap(); + counts_builder.append_value(aref.counts); + + let props_builder = aref_prop_builder.field_builder::(8).unwrap(); + append_properties(props_builder, aref.properties); + + aref_prop_builder.append(true); +} + + +fn aref_to_sref(aref: ARefRecord) -> SRefRecord { + SRefRecord { + target_id: aref.target_id, + invert_y: aref.invert_y, + scale: aref.scale, + angle_rad: aref.angle_rad, + xy: aref.xy, + properties: aref.properties, + } +} + + +fn read_boundary<'a>( + input: &'a [u8], + layers: &mut HashMap, + ) -> IResult<'a, BoundaryRecord> { let (input, _) = records::BOUNDARY::read(input)?; let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = DATATYPE::read(input)?; - insert_layer(layer, dtype, layers, boundary_builder, 0); + let layer_id = layer_id_for(layer, dtype, layers); - let xys_builder = boundary_builder.field_builder::(1).unwrap(); - let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); let (input, xy_iter) = XY::read(input)?; - for xy in xy_iter { - xy_builder.append_value(xy); + let mut vertices: Vec = xy_iter.collect(); + vertices.truncate(vertices.len().saturating_sub(2)); + + let (input, properties) = read_properties_vec(input)?; + + Ok((input, BoundaryRecord { layer_id, vertices, properties })) +} + + +fn append_boundary_batch( + boundary_batches_builder: &mut DListBuilder, + boundary_batch: BoundaryBatchRecord, + ) { + let boundary_batch_builder = boundary_batches_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let layer_builder = boundary_batch_builder.field_builder::(0).unwrap(); + layer_builder.append_value(boundary_batch.layer_id); + + let vertices_builder = boundary_batch_builder.field_builder::(1).unwrap(); + let vertex_builder = vertices_builder.values().as_any_mut().downcast_mut::().unwrap(); + vertex_builder.append_slice(&boundary_batch.vertices); + vertices_builder.append(true); + + let offsets_builder = boundary_batch_builder.field_builder::(2).unwrap(); + let offset_builder = offsets_builder.values().as_any_mut().downcast_mut::().unwrap(); + offset_builder.append_slice(&boundary_batch.vertex_offsets); + offsets_builder.append(true); + + boundary_batch_builder.append(true); +} + + +fn append_rect_batch( + rect_batches_builder: &mut DListBuilder, + rect_batch: RectBatchRecord, + ) { + let rect_batch_builder = rect_batches_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let layer_builder = rect_batch_builder.field_builder::(0).unwrap(); + layer_builder.append_value(rect_batch.layer_id); + + let rects_builder = rect_batch_builder.field_builder::(1).unwrap(); + let rect_builder = rects_builder.values().as_any_mut().downcast_mut::().unwrap(); + rect_builder.append_slice(&rect_batch.rects); + rects_builder.append(true); + + rect_batch_builder.append(true); +} + + +fn append_boundary_prop( + boundary_props_builder: &mut DListBuilder, + boundary: BoundaryRecord, + ) { + let boundary_prop_builder = boundary_props_builder.values().as_any_mut().downcast_mut::().unwrap(); + + let layer_builder = boundary_prop_builder.field_builder::(0).unwrap(); + layer_builder.append_value(boundary.layer_id); + + let vertices_builder = boundary_prop_builder.field_builder::(1).unwrap(); + let vertex_builder = vertices_builder.values().as_any_mut().downcast_mut::().unwrap(); + vertex_builder.append_slice(&boundary.vertices); + vertices_builder.append(true); + + let props_builder = boundary_prop_builder.field_builder::(2).unwrap(); + for prop in boundary.properties { + let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); + let key_builder = prop_builder.field_builder::(0).unwrap(); + key_builder.append_value(prop.key); + + let val_builder = prop_builder.field_builder::(1).unwrap(); + val_builder.append_value(prop.value); + + prop_builder.append(true); } - xys_builder.append(true); + props_builder.append(true); - let props_builder = boundary_builder.field_builder::(2).unwrap(); - let (input, ()) = read_properties(input, props_builder)?; - - boundary_builder.append(true); - Ok((input, ())) + boundary_prop_builder.append(true); } @@ -123,7 +509,7 @@ pub fn read_path<'a>( cell_builder: &mut StructBuilder, layers: &mut HashMap, ) -> IResult<'a, ()> { - let paths_builder = cell_builder.field_builder::(4).unwrap(); + let paths_builder = cell_builder.field_builder::(9).unwrap(); let path_builder = paths_builder.values().as_any_mut().downcast_mut::().unwrap(); let (input, _) = records::PATH::read(input)?; @@ -196,8 +582,8 @@ pub fn read_boxnode<'a>( layers: &mut HashMap, ) -> IResult<'a, ()> { let field_num = match tag { - records::RTAG_NODE => 5, - records::RTAG_BOX => 6, + records::RTAG_NODE => 10, + records::RTAG_BOX => 11, _ => return fail(input, format!("Unexpected tag {:04x}", tag)), }; @@ -238,7 +624,7 @@ pub fn read_text<'a>( cell_builder: &mut StructBuilder, layers: &mut HashMap, ) -> IResult<'a, ()> { - let texts_builder = cell_builder.field_builder::(7).unwrap(); + let texts_builder = cell_builder.field_builder::(12).unwrap(); let text_builder = texts_builder.values().as_any_mut().downcast_mut::().unwrap(); let mut path_type = None; @@ -333,13 +719,53 @@ pub fn read_text<'a>( } +fn boundary_to_rect(vertices: &[i32]) -> Option<[i32; 4]> { + if vertices.len() != 8 { + return None; + } -pub fn read_ref<'a>( + let xs = [vertices[0], vertices[2], vertices[4], vertices[6]]; + let ys = [vertices[1], vertices[3], vertices[5], vertices[7]]; + + let min_x = *xs.iter().min().unwrap(); + let max_x = *xs.iter().max().unwrap(); + let min_y = *ys.iter().min().unwrap(); + let max_y = *ys.iter().max().unwrap(); + if min_x >= max_x || min_y >= max_y { + return None; + } + + let mut seen_corners: u8 = 0; + for ii in 0..4 { + let jj = (ii + 1) % 4; + let dx = xs[jj] - xs[ii]; + let dy = ys[jj] - ys[ii]; + if (dx == 0) == (dy == 0) { + return None; + } + + let x = xs[ii]; + let y = ys[ii]; + if (x != min_x && x != max_x) || (y != min_y && y != max_y) { + return None; + } + let code = ((x == max_x) as u8) << 1 | ((y == max_y) as u8); + seen_corners |= 1 << code; + } + + if seen_corners != 0b1111 { + return None; + } + Some([min_x, min_y, max_x, max_y]) +} + + + +fn read_ref<'a>( input: &'a [u8], - cell_builder: &mut StructBuilder, tag: u16, names: &mut HashMap, - ) -> IResult<'a, ()> { + ) -> IResult<'a, RefRecord> { let (input, _) = match tag { records::RTAG_SREF => records::SREF::read(input)?, records::RTAG_AREF => records::AREF::read(input)?, @@ -347,20 +773,15 @@ pub fn read_ref<'a>( }; let is_aref = tag == records::RTAG_AREF; - let refs_builder = cell_builder.field_builder::(2).unwrap(); - let ref_builder = refs_builder.values().as_any_mut().downcast_mut::().unwrap(); - - let mut invert_y = None; - let mut mag = None; - let mut angle_deg = None; + let mut invert_y = false; + let mut scale = 1.0; + let mut angle_rad = 0.0; let mut colrow = None; let (input, struct_name_bytes) = SNAME::skip_and_read(input)?; let struct_name = String::from_utf8(struct_name_bytes).unwrap(); let next_id = names.len(); let id = names.entry(struct_name).or_insert(next_id.try_into().unwrap()); - let target_builder = ref_builder.field_builder::(0).unwrap(); - target_builder.append_value(*id); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_XY { @@ -368,17 +789,17 @@ pub fn read_ref<'a>( records::RTAG_STRANS => { let strans; (input, strans) = STRANS::read_data(input, header.data_size)?; - invert_y = Some(strans[0]); + invert_y = strans[0]; }, records::RTAG_MAG => { let _mag; (input, _mag) = MAG::read_data(input, header.data_size)?; - mag = Some(_mag); + scale = _mag; }, records::RTAG_ANGLE => { let _angle_deg; (input, _angle_deg) = ANGLE::read_data(input, header.data_size)?; - angle_deg = Some(_angle_deg); + angle_rad = _angle_deg.to_radians(); }, records::RTAG_COLROW => { let mut _colrow; @@ -393,23 +814,13 @@ pub fn read_ref<'a>( }; (input, header) = RecordHeader::read(input)?; } - let inv_builder = ref_builder.field_builder::(1).unwrap(); - inv_builder.append_option(invert_y); - let mag_builder = ref_builder.field_builder::(2).unwrap(); - mag_builder.append_option(mag); - let angle_builder = ref_builder.field_builder::(3).unwrap(); - angle_builder.append_option(angle_deg); - let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; let xx = xy_iter.next().unwrap(); let yy = xy_iter.next().unwrap(); let xy = ((xx as u32 as u64) << 32) | (yy as u32 as u64); - let xy_builder = ref_builder.field_builder::(4).unwrap(); - xy_builder.append_value(xy); + let (input, properties) = read_properties_vec(input)?; - let rep_builder = ref_builder.field_builder::(5).unwrap(); -// println!("ref, {is_aref:?}"); if is_aref { if colrow.is_none() { return fail(input, "AREF without COLROW before XY".to_string()) @@ -423,30 +834,28 @@ pub fn read_ref<'a>( let xy0a = ((x0a as u32 as u64) << 32) | (y0a as u32 as u64); let xy1a = ((x1a as u32 as u64) << 32) | (y1a as u32 as u64); - let xy0_builder = rep_builder.field_builder::(0).unwrap(); - xy0_builder.append_value(xy0a); - let xy1_builder = rep_builder.field_builder::(1).unwrap(); - xy1_builder.append_value(xy1a); - let counts = ((count0 as u16 as u32) << 16) | (count1 as u16 as u32); - let counts_builder = rep_builder.field_builder::(2).unwrap(); - counts_builder.append_value(counts); - + Ok((input, RefRecord::ARef(ARefRecord { + target_id: *id, + invert_y, + scale, + angle_rad, + xy, + xy0: xy0a, + xy1: xy1a, + counts, + properties, + }))) } else { - let xy0_builder = rep_builder.field_builder::(0).unwrap(); - xy0_builder.append_null(); - let xy1_builder = rep_builder.field_builder::(1).unwrap(); - xy1_builder.append_null(); - let counts_builder = rep_builder.field_builder::(2).unwrap(); - counts_builder.append_null(); + Ok((input, RefRecord::SRef(SRefRecord { + target_id: *id, + invert_y, + scale, + angle_rad, + xy, + properties, + }))) } - rep_builder.append(is_aref); - - let props_builder = ref_builder.field_builder::(6).unwrap(); - let (input, ()) = read_properties(input, props_builder)?; - - ref_builder.append(true); - Ok((input, ())) } @@ -490,6 +899,29 @@ pub fn read_properties<'a>(input: &'a [u8], props_builder: &mut DListBuilder) -> Ok((input, ())) } + +fn read_properties_vec<'a>(input: &'a [u8]) -> IResult<'a, Vec> { + let mut properties = Vec::::new(); + + let (mut input, mut header) = RecordHeader::read(input)?; + while header.tag != ENDEL::tag() { + if header.tag == PROPATTR::tag() { + let key; + let value_bytes; + (input, key) = PROPATTR::read_data(input, header.data_size)?; + (input, value_bytes) = PROPVALUE::read(input)?; + + properties.push(PropertyRecord { + key, + value: String::from_utf8(value_bytes).unwrap(), + }); + } + (input, header) = RecordHeader::read(input)?; + } + + Ok((input, properties)) +} + /* /// diff --git a/src/library.rs b/src/library.rs index abe4ca4..15ae3c8 100644 --- a/src/library.rs +++ b/src/library.rs @@ -111,19 +111,43 @@ pub fn read_library(input: &[u8]) -> IResult { )); - let repetition_struct_t = DataType::Struct(Fields::from(vec![ + let sref_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, false), + Field::new("scale", DataType::Float64, false), + Field::new("angle_rad", DataType::Float64, false), + Field::new("xy", DataType::UInt64, false), + ])); + + let aref_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, false), + Field::new("scale", DataType::Float64, false), + Field::new("angle_rad", DataType::Float64, false), + Field::new("xy", DataType::UInt64, false), Field::new("xy0", DataType::UInt64, false), Field::new("xy1", DataType::UInt64, false), Field::new("counts", DataType::UInt32, false), ])); - let ref_struct_t = DataType::Struct(Fields::from(vec![ + let sref_prop_struct_t = DataType::Struct(Fields::from(vec![ Field::new("target", DataType::UInt32, false), - Field::new("invert_y", DataType::Boolean, true), - Field::new("mag", DataType::Float64, true), - Field::new("angle_deg", DataType::Float64, true), + Field::new("invert_y", DataType::Boolean, false), + Field::new("scale", DataType::Float64, false), + Field::new("angle_rad", DataType::Float64, false), Field::new("xy", DataType::UInt64, false), - Field::new("repetition", repetition_struct_t, true), + Field::new("properties", property_list_t.clone(), true), + ])); + + let aref_prop_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("target", DataType::UInt32, false), + Field::new("invert_y", DataType::Boolean, false), + Field::new("scale", DataType::Float64, false), + Field::new("angle_rad", DataType::Float64, false), + Field::new("xy", DataType::UInt64, false), + Field::new("xy0", DataType::UInt64, false), + Field::new("xy1", DataType::UInt64, false), + Field::new("counts", DataType::UInt32, false), Field::new("properties", property_list_t.clone(), true), ])); @@ -147,9 +171,24 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new_list_field(DataType::Int32, false) )); - let boundary_struct_t = DataType::Struct(Fields::from(vec![ + let boundary_batch_struct_t = DataType::Struct(Fields::from(vec![ Field::new("layer", DataType::UInt32, false), - Field::new("xy", coords_t.clone(), false), + Field::new("vertices", coords_t.clone(), false), + Field::new( + "vertex_offsets", + DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, false))), + false, + ), + ])); + + let rect_batch_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt32, false), + Field::new("rects", coords_t.clone(), false), + ])); + + let boundary_prop_struct_t = DataType::Struct(Fields::from(vec![ + Field::new("layer", DataType::UInt32, false), + Field::new("vertices", coords_t.clone(), false), Field::new("properties", property_list_t.clone(), true), ])); @@ -169,17 +208,36 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("properties", property_list_t.clone(), true), ])); + let sref_list_t = DataType::List(Arc::new( + Field::new_list_field(sref_struct_t, false) + )); - let ref_list_t = DataType::List(Arc::new( - Field::new_list_field(ref_struct_t, false) + let aref_list_t = DataType::List(Arc::new( + Field::new_list_field(aref_struct_t, false) + )); + + let sref_prop_list_t = DataType::List(Arc::new( + Field::new_list_field(sref_prop_struct_t, false) + )); + + let aref_prop_list_t = DataType::List(Arc::new( + Field::new_list_field(aref_prop_struct_t, false) )); let text_list_t = DataType::List(Arc::new( Field::new_list_field(text_struct_t, false) )); - let boundary_list_t = DataType::List(Arc::new( - Field::new_list_field(boundary_struct_t, false) + let boundary_batch_list_t = DataType::List(Arc::new( + Field::new_list_field(boundary_batch_struct_t, false) + )); + + let rect_batch_list_t = DataType::List(Arc::new( + Field::new_list_field(rect_batch_struct_t, false) + )); + + let boundary_prop_list_t = DataType::List(Arc::new( + Field::new_list_field(boundary_prop_struct_t, false) )); let path_list_t = DataType::List(Arc::new( @@ -207,8 +265,13 @@ pub fn read_library(input: &[u8]) -> IResult { let cell_struct_t = DataType::Struct(Fields::from(vec![ Field::new("id", DataType::UInt32, false), Field::new("file_offset", DataType::UInt64, false), - Field::new("refs", ref_list_t, false), - Field::new("boundaries", boundary_list_t, false), + Field::new("srefs", sref_list_t, false), + Field::new("arefs", aref_list_t, false), + Field::new("sref_props", sref_prop_list_t, false), + Field::new("aref_props", aref_prop_list_t, false), + Field::new("rect_batches", rect_batch_list_t, false), + Field::new("boundary_batches", boundary_batch_list_t, false), + Field::new("boundary_props", boundary_prop_list_t, false), Field::new("paths", path_list_t, false), Field::new("nodes", boxnode_list_t.clone(), true), Field::new("boxes", boxnode_list_t.clone(), true), From d3bf15f47ae5267fe56ace576cd2c4d7eaef0b0b Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 2 Apr 2026 20:35:31 -0700 Subject: [PATCH 31/31] follow masque interface --- Cargo.toml | 2 +- src/basic.rs | 20 ++++++++++---------- src/elements.rs | 14 +++++++------- src/lib.rs | 32 ++++++++++++++++---------------- src/library.rs | 4 ++-- src/record.rs | 36 ++++++++++++++++++------------------ 6 files changed, 54 insertions(+), 54 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 87243ac..e201109 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "klamath_rs_ext" version = "0.2.0" authors = ["jan "] -edition = "2021" +edition = "2024" [lib] diff --git a/src/basic.rs b/src/basic.rs index 8bad4e7..051d0ba 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -15,15 +15,15 @@ pub enum ErrType { Failed(String), } -pub fn fail(input: &[u8], msg: String) -> IResult { +pub fn fail(input: &[u8], msg: String) -> IResult<'_, O> { Err((input, ErrType::Failed(msg))) } -pub fn incomplete(input: &[u8], size: Option) -> IResult { +pub fn incomplete(input: &[u8], size: Option) -> IResult<'_, O> { Err((input, ErrType::Incomplete(size))) } -pub fn take_bytes>(input: &[u8], count: CC) -> IResult<&[u8]> { +pub fn take_bytes>(input: &[u8], count: CC) -> IResult<'_, &[u8]> { let cc = count.into(); if input.len() < cc { incomplete(input, Some(cc)) @@ -37,19 +37,19 @@ pub fn take_bytes>(input: &[u8], count: CC) -> IResult<&[u8]> { /* * Parse functions */ -pub fn parse_u16(input: &[u8]) -> IResult { +pub fn parse_u16(input: &[u8]) -> IResult<'_, u16> { let (input, buf) = take_bytes(input, 2_usize)?; let val = BigEndian::read_u16(buf); Ok((input, val)) } -pub fn parse_int2(input: &[u8]) -> IResult { +pub fn parse_int2(input: &[u8]) -> IResult<'_, i16> { let (input, buf) = take_bytes(input, 2_usize)?; let val = BigEndian::read_i16(buf); Ok((input, val)) } -pub fn parse_int4(input: &[u8]) -> IResult { +pub fn parse_int4(input: &[u8]) -> IResult<'_, i32> { let (input, buf) = take_bytes(input, 4_usize)?; let val = BigEndian::read_i32(buf); Ok((input, val)) @@ -67,14 +67,14 @@ pub fn decode_real8(int: u64) -> f64 { mant * 2_f64.powi(exp2) } -pub fn parse_real8(input: &[u8]) -> IResult { +pub fn parse_real8(input: &[u8]) -> IResult<'_, f64> { let (input, buf) = take_bytes(input, 8_usize)?; let data = BigEndian::read_u64(buf); Ok((input, decode_real8(data))) } -pub fn parse_datetime(input: &[u8]) -> IResult<[i16; 6]> { +pub fn parse_datetime(input: &[u8]) -> IResult<'_, [i16; 6]> { let mut buf = [0_i16; 6]; let mut input = input; for bb in &mut buf { @@ -85,7 +85,7 @@ pub fn parse_datetime(input: &[u8]) -> IResult<[i16; 6]> { } -pub fn parse_bitarray(input: &[u8]) -> IResult<[bool; 16]> { +pub fn parse_bitarray(input: &[u8]) -> IResult<'_, [bool; 16]> { let mut bits = [false; 16]; let (input, val) = parse_int2(input)?; for (ii, bit) in bits.iter_mut().enumerate() { @@ -95,7 +95,7 @@ pub fn parse_bitarray(input: &[u8]) -> IResult<[bool; 16]> { } -pub fn parse_ascii(input: &[u8], length: u16) -> IResult> { +pub fn parse_ascii(input: &[u8], length: u16) -> IResult<'_, Vec> { let length = length as usize; let (input, data) = take_bytes(input, length)?; let last = data[length - 1]; diff --git a/src/elements.rs b/src/elements.rs index f44059c..e42e3f8 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -946,7 +946,7 @@ pub trait Element { /// Read from a stream to construct this object. /// Consumes up to (and including) the ENDEL record. /// - fn read(input: &[u8]) -> IResult where Self: Sized; + fn read(input: &[u8]) -> IResult<'_, Self> where Self: Sized; /// /// Write this element to a stream. @@ -993,7 +993,7 @@ pub struct Reference { } impl Element for Reference { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let mut invert_y = false; let mut mag = 1.0; let mut angle_deg = 0.0; @@ -1094,7 +1094,7 @@ pub struct Boundary { } impl Element for Boundary { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = DATATYPE::read(input)?; let (input, xy) = XY::read(input)?; @@ -1142,7 +1142,7 @@ pub struct Path { } impl Element for Path { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let mut path_type = 0; let mut width = 0; let mut bgn_ext = 0; @@ -1221,7 +1221,7 @@ pub struct GDSBox { } impl Element for GDSBox { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = BOXTYPE::read(input)?; let (input, xy) = XY::read(input)?; @@ -1260,7 +1260,7 @@ pub struct Node { } impl Element for Node { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let (input, layer) = LAYER::skip_and_read(input)?; let (input, dtype) = NODETYPE::read(input)?; let (input, xy) = XY::read(input)?; @@ -1317,7 +1317,7 @@ pub struct Text { } impl Element for Text { - fn read(input: &[u8]) -> IResult { + fn read(input: &[u8]) -> IResult<'_, Self> { let mut path_type = 0; let mut presentation = [false; 16]; let mut invert_y = false; diff --git a/src/lib.rs b/src/lib.rs index 78e359b..39081ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ use arrow::ffi::{to_ffi, FFI_ArrowArray, FFI_ArrowSchema}; use arrow::array::Array; -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn read_path( cpath: *const c_char, arr: *mut FFI_ArrowArray, @@ -40,7 +40,7 @@ pub unsafe extern "C" fn read_path( let input = fs::read(path).expect("File read failed"); let (_input, struct_arr) = read_library(&input).expect("Read failed"); - (*arr, *schema) = to_ffi(&struct_arr.to_data()).unwrap(); + unsafe { (*arr, *schema) = to_ffi(&struct_arr.to_data()).unwrap(); } } @@ -80,40 +80,40 @@ macro_rules! impl_i32be { } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn f64_to_i16(arr: *mut f64, size: usize) -> f64 { impl_i16be!(f64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn f64_to_i32(arr: *mut f64, size: usize) -> f64 { impl_i32be!(f64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn f32_to_i16(arr: *mut f32, size: usize) -> f32 { impl_i16be!(f32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn f32_to_i32(arr: *mut f32, size: usize) -> f32 { impl_i32be!(f32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u64_to_i16(arr: *mut u64, size: usize) -> u64 { impl_i16be!(u64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u64_to_i32(arr: *mut u64, size: usize) -> u64 { impl_i32be!(u64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i64_to_i16(arr: *mut i64, size: usize) -> i64 { impl_i16be!(i64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i64_to_i32(arr: *mut i64, size: usize) -> i64 { impl_i32be!(i64, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u32_to_i16(arr: *mut u32, size: usize) -> u32 { impl_i16be!(u32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u32_to_i32(arr: *mut u32, size: usize) -> u32 { impl_i32be!(u32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i32_to_i16(arr: *mut i32, size: usize) -> i32 { impl_i16be!(i32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i32_to_i32(arr: *mut i32, size: usize) -> i32 { impl_i32be!(i32, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn u16_to_i16(arr: *mut u16, size: usize) -> u16 { impl_i16be!(u16, arr, size) } -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn i16_to_i16(arr: *mut i16, size: usize) -> i16 { impl_i16be!(i16, arr, size) } diff --git a/src/library.rs b/src/library.rs index 15ae3c8..bf4c890 100644 --- a/src/library.rs +++ b/src/library.rs @@ -64,7 +64,7 @@ impl FileHeader { /// Returns: /// FileHeader object /// - pub fn read(input: &[u8]) -> IResult { + pub fn read(input: &[u8]) -> IResult<'_, Self> { let (input, _version) = records::HEADER::read(input)?; let (input, [mod_time, acc_time]) = records::BGNLIB::read(input)?; let (input, name) = records::LIBNAME::skip_and_read(input)?; @@ -98,7 +98,7 @@ impl FileHeader { } -pub fn read_library(input: &[u8]) -> IResult { +pub fn read_library(input: &[u8]) -> IResult<'_, StructArray> { let input_size = input.len(); let property_t = DataType::Struct(Fields::from(vec![ diff --git a/src/record.rs b/src/record.rs index 3226dbb..8bc482c 100644 --- a/src/record.rs +++ b/src/record.rs @@ -24,7 +24,7 @@ pub struct RecordHeader { } impl RecordHeader { - pub fn read(input: &[u8]) -> IResult { + pub fn read(input: &[u8]) -> IResult<'_, RecordHeader> { let (input, size) = parse_u16(input)?; let (input, tag) = parse_u16(input)?; Ok((input, RecordHeader{tag, data_size:size - 4})) @@ -50,7 +50,7 @@ pub trait RecordData { type InData : ?Sized; type ByteData : AsRef<[u8]>; - fn read(input: &[u8], size: u16) -> IResult>; + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>>; fn pack_into(buf: &mut [u8], data: &Self::InData); //fn size(data: &Self::BareData<'_>) -> u16; fn pack(data: &Self::InData) -> Self::ByteData; @@ -72,7 +72,7 @@ pub trait Record { } } - fn read_header(input: &[u8]) -> IResult { + fn read_header(input: &[u8]) -> IResult<'_, RecordHeader> { RecordHeader::read(input) } @@ -80,7 +80,7 @@ pub trait Record { RecordHeader{tag: Self::tag(), data_size}.write(ww) } - fn read_data(input: &[u8], size: u16) -> IResult> { + fn read_data(input: &[u8], size: u16) -> IResult<'_, RData::BareData<'_>> { RData::read(input, size) } @@ -95,7 +95,7 @@ pub trait Record { /// True if the record was encountered and skipped. /// False if the end of the library was reached. /// - fn skip_past(input: &[u8]) -> IResult { + fn skip_past(input: &[u8]) -> IResult<'_, bool> { let original_input = input; let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != Self::tag() { @@ -109,7 +109,7 @@ pub trait Record { Ok((input, true)) } - fn skip_and_read(input: &[u8]) -> IResult> { + fn skip_and_read(input: &[u8]) -> IResult<'_, RData::BareData<'_>> { let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != Self::tag() { (input, _) = take_bytes(input, header.data_size)?; @@ -119,7 +119,7 @@ pub trait Record { Ok((input, data)) } - fn expect_header(input: &[u8]) -> IResult { + fn expect_header(input: &[u8]) -> IResult<'_, u16> { let (input, header) = RecordHeader::read(input)?; if header.tag != Self::tag() { fail(input, format!("Unexpected record! Got tag 0x{:04x}, expected 0x{:04x}", header.tag, Self::tag())) @@ -128,7 +128,7 @@ pub trait Record { } } - fn read(input: &[u8]) -> IResult> { + fn read(input: &[u8]) -> IResult<'_, RData::BareData<'_>> { let (input, size) = Self::expect_header(input)?; Self::check_size(size).unwrap(); let (input, data) = Self::read_data(input, size)?; @@ -152,7 +152,7 @@ impl RecordData for BitArray { type InData = [bool; 16]; type ByteData = [u8; 2]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 2); parse_bitarray(input) } @@ -175,7 +175,7 @@ impl RecordData for Int2 { type InData = i16; type ByteData = [u8; 2]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 2); parse_int2(input) } @@ -197,7 +197,7 @@ impl RecordData for Int4 { type InData = i32; type ByteData = [u8; 4]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 4); parse_int4(input) } @@ -220,7 +220,7 @@ impl RecordData for Int2Array { type InData = [i16]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size % 2 == 0, "Record must contain an integer quantity of integers"); //let mut input = input; let (input, bytes) = take_bytes(input, size)?; @@ -261,7 +261,7 @@ impl RecordData for Int4Array { type InData = [i32]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size % 4 == 0, "Record must contain an integer quantity of integers"); //let mut input = input; let (input, bytes) = take_bytes(input, size)?; @@ -302,7 +302,7 @@ impl RecordData for Real8 { type InData = f64; type ByteData = [u8; 8]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 8); parse_real8(input) } @@ -324,7 +324,7 @@ impl RecordData for Real8Pair { type InData = (f64, f64); type ByteData = [u8; 2 * 8]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 2 * 8); let (input, data0) = parse_real8(input)?; let (input, data1) = parse_real8(input)?; @@ -354,7 +354,7 @@ impl RecordData for ASCII { type InData = [u8]; type ByteData = Vec; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { parse_ascii(input, size) } @@ -376,7 +376,7 @@ impl RecordData for DateTimePair { type InData = [[i16; 6]; 2]; type ByteData = [u8; 2 * 6 * 2]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 2 * 6 * 2); let (input, data0) = parse_datetime(input)?; let (input, data1) = parse_datetime(input)?; @@ -406,7 +406,7 @@ impl RecordData for Empty { type InData = (); type ByteData = [u8; 0]; - fn read(input: &[u8], size: u16) -> IResult> { + fn read(input: &[u8], size: u16) -> IResult<'_, Self::BareData<'_>> { assert!(size == 0); Ok((input, ())) }