diff --git a/Cargo.toml b/Cargo.toml index c7deda4..aa0b4a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,4 +12,4 @@ crate-type = ["cdylib", "rlib"] [dependencies] byteorder = "^1" -arrow = "^54" +arrow = {version = "^54", features = ["ffi"]} diff --git a/src/elements.rs b/src/elements.rs index 3e41115..ee6e7e8 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -20,35 +20,54 @@ use std::collections::HashMap; //use std::io::Write; use arrow::array::{ - StructBuilder, ListBuilder, StringBuilder, ArrayBuilder, Float64Builder, BooleanBuilder, - Int32Builder, Int16Builder, UInt32Builder, UInt8Builder, + StructBuilder, FixedSizeListBuilder, ListBuilder, StringBuilder, ArrayBuilder, BooleanBuilder, + Int32Builder, Int16Builder, UInt32Builder, UInt8Builder, Float64Builder, }; pub type DListBuilder = ListBuilder>; +pub type FListBuilder = FixedSizeListBuilder>; pub fn read_elements<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, names: &mut HashMap) -> IResult<'a, ()> { - let (mut input, mut header) = RecordHeader::read(input)?; + let mut input = input; + let (_, mut header) = RecordHeader::read(input)?; // don't consume tag while header.tag != records::RTAG_ENDSTR { - match header.tag { - records::RTAG_BOUNDARY => {(input, _) = read_boundary(input, cell_builder)?;}, - records::RTAG_PATH => {read_path(input, cell_builder)?;}, - records::RTAG_NODE => {read_boxnode(input, cell_builder, header.tag)?;}, - records::RTAG_BOX => {read_boxnode(input, cell_builder, header.tag)?;}, - records::RTAG_TEXT => {read_text(input, cell_builder)?;}, - records::RTAG_SREF => {read_ref(input, cell_builder, header.tag, names)?;}, - records::RTAG_AREF => {read_ref(input, cell_builder, header.tag, names)?;}, + (input, _) = match header.tag { + records::RTAG_SREF => read_ref(input, cell_builder, header.tag, names)?, + records::RTAG_AREF => read_ref(input, cell_builder, header.tag, names)?, + records::RTAG_BOUNDARY => read_boundary(input, cell_builder)?, + records::RTAG_PATH => read_path(input, cell_builder)?, + records::RTAG_NODE => read_boxnode(input, cell_builder, header.tag)?, + records::RTAG_BOX => read_boxnode(input, cell_builder, header.tag)?, + records::RTAG_TEXT => read_text(input, cell_builder)?, _ => { // don't care, skip - (input, _) = take_bytes(input, header.data_size)?; + let result = take_bytes(input, header.data_size + 4)?; + (result.0, ()) } - } - (input, header) = RecordHeader::read(input)?; + }; + (_, header) = RecordHeader::read(input)?; // don't consume tag } + (input, _) = take_bytes(input, 4_usize)?; // consume endstr tag + + let refs_builder = cell_builder.field_builder::(2).unwrap(); + refs_builder.append(true); + let boundaries_builder = cell_builder.field_builder::(3).unwrap(); + boundaries_builder.append(true); + let paths_builder = cell_builder.field_builder::(4).unwrap(); + paths_builder.append(true); + let nodes_builder = cell_builder.field_builder::(5).unwrap(); + nodes_builder.append(true); + let boxes_builder = cell_builder.field_builder::(6).unwrap(); + boxes_builder.append(true); + let texts_builder = cell_builder.field_builder::(7).unwrap(); + texts_builder.append(true); + Ok((input, ())) } + pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResult<'a, ()> { let boundaries_builder = cell_builder.field_builder::(3).unwrap(); let boundary_builder = boundaries_builder.values().as_any_mut().downcast_mut::().unwrap(); @@ -75,7 +94,6 @@ pub fn read_boundary<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> I let (input, ()) = read_properties(input, props_builder)?; boundary_builder.append(true); - boundaries_builder.append(true); Ok((input, ())) } @@ -84,7 +102,6 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let paths_builder = cell_builder.field_builder::(4).unwrap(); let path_builder = paths_builder.values().as_any_mut().downcast_mut::().unwrap(); - let (input, _) = records::PATH::read(input)?; let (input, layer) = LAYER::skip_and_read(input)?; @@ -137,7 +154,7 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu width_builder.append_option(width); let xys_builder = path_builder.field_builder::(6).unwrap(); - let (input, xy_iter) = XY::read(input)?; + let (input, xy_iter) = XY::read_data(input, header.data_size)?; for xy in xy_iter { let xy_builder = xys_builder.values().as_any_mut().downcast_mut::().unwrap(); xy_builder.append_value(xy); @@ -148,7 +165,6 @@ pub fn read_path<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let (input, ()) = read_properties(input, props_builder)?; path_builder.append(true); - paths_builder.append(true); Ok((input, ())) } @@ -192,7 +208,6 @@ pub fn read_boxnode<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: let (input, ()) = read_properties(input, props_builder)?; boxnode_builder.append(true); - boxnodes_builder.append(true); Ok((input, ())) } @@ -200,7 +215,6 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let texts_builder = cell_builder.field_builder::(7).unwrap(); let text_builder = texts_builder.values().as_any_mut().downcast_mut::().unwrap(); - let mut path_type = None; let mut pres_hori = None; let mut pres_vert = None; @@ -277,7 +291,7 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let angle_builder = text_builder.field_builder::(9).unwrap(); angle_builder.append_option(angle_deg); - let (input, mut xy_iter) = XY::read(input)?; + let (input, mut xy_iter) = XY::read_data(input, header.data_size)?; let x_builder = text_builder.field_builder::(10).unwrap(); x_builder.append_value(xy_iter.next().unwrap()); let y_builder = text_builder.field_builder::(11).unwrap(); @@ -288,19 +302,24 @@ pub fn read_text<'a>(input: &'a [u8], cell_builder: &mut StructBuilder) -> IResu let string_builder = text_builder.field_builder::(12).unwrap(); string_builder.append_value(string); - let props_builder = text_builder.field_builder::(13).unwrap(); + let props_builder = text_builder.field_builder::(13).unwrap(); let (input, ()) = read_properties(input, props_builder)?; text_builder.append(true); - texts_builder.append(true); Ok((input, ())) } pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, names: &mut HashMap) -> IResult<'a, ()> { + let (input, _) = match tag { + records::RTAG_SREF => records::SREF::read(input)?, + records::RTAG_AREF => records::AREF::read(input)?, + _ => return fail(input, format!("Unexpected tag {:04x}", tag)), + }; + let is_aref = tag == records::RTAG_AREF; - let refs_builder = cell_builder.field_builder::(7).unwrap(); + let refs_builder = cell_builder.field_builder::(2).unwrap(); let ref_builder = refs_builder.values().as_any_mut().downcast_mut::().unwrap(); let mut invert_y = None; @@ -361,6 +380,7 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, y_builder.append_value(xy_iter.next().unwrap()); let rep_builder = ref_builder.field_builder::(6).unwrap(); + println!("ref, {is_aref:?}"); if is_aref { let x0_builder = rep_builder.field_builder::(0).unwrap(); x0_builder.append_value(xy_iter.next().unwrap()); @@ -380,6 +400,19 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, count1_builder.append_value(count1); }, } + } else { + let x0_builder = rep_builder.field_builder::(0).unwrap(); + x0_builder.append_null(); + let y0_builder = rep_builder.field_builder::(1).unwrap(); + y0_builder.append_null(); + let x1_builder = rep_builder.field_builder::(2).unwrap(); + x1_builder.append_null(); + let y1_builder = rep_builder.field_builder::(3).unwrap(); + y1_builder.append_null(); + let count0_builder = rep_builder.field_builder::(4).unwrap(); + count0_builder.append_null(); + let count1_builder = rep_builder.field_builder::(5).unwrap(); + count1_builder.append_null(); } rep_builder.append(is_aref); @@ -387,7 +420,6 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, let (input, ()) = read_properties(input, props_builder)?; ref_builder.append(true); - refs_builder.append(true); Ok((input, ())) } @@ -405,7 +437,6 @@ pub fn read_ref<'a>(input: &'a [u8], cell_builder: &mut StructBuilder, tag: u16, /// propattr: -> propvalue mapping /// pub fn read_properties<'a>(input: &'a [u8], props_builder: &mut DListBuilder) -> IResult<'a, ()> { - let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != ENDEL::tag() { @@ -418,6 +449,7 @@ pub fn read_properties<'a>(input: &'a [u8], props_builder: &mut DListBuilder) -> let value = String::from_utf8(value_bytes).unwrap(); //assert!(!properties.contains_key(&key), "Duplicate property key: {}", key); + let prop_builder = props_builder.values().as_any_mut().downcast_mut::().unwrap(); let key_builder = prop_builder.field_builder::(0).unwrap(); key_builder.append_value(key); diff --git a/src/lib.rs b/src/lib.rs index 0f8c365..33b7598 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,9 +6,44 @@ pub mod records; pub mod elements; pub mod library; +use crate::library::read_library; use byteorder::{ByteOrder, BigEndian}; use std::mem::size_of; +use std::ffi::{CStr, OsStr, c_char}; +use std::os::unix::ffi::OsStrExt; +use std::str; +use std::fs; +use std::path::Path; + +use arrow::ffi::{to_ffi, FFI_ArrowArray, FFI_ArrowSchema}; +use arrow::array::Array; + + +#[no_mangle] +pub unsafe extern "C" fn read_path( + cpath: *const c_char, + arr: *mut FFI_ArrowArray, + schema: *mut FFI_ArrowSchema, + ) { + let cstr = unsafe { CStr::from_ptr(cpath) }; + let path: &Path; + if cfg!(unix) { + let osstr = OsStr::from_bytes(cstr.to_bytes()); + path = osstr.as_ref(); + } else if cfg!(windows) { + let ustr = str::from_utf8(cstr.to_bytes()).expect("Non-UTF8 paths are not supported"); + path = ustr.as_ref(); + } else { + panic!("Unsupported OS"); + } + + let input = fs::read(path).expect("File read failed"); + let (_input, struct_arr) = read_library(&input).expect("Read failed"); + let (mut arr_v, mut schema_v) = to_ffi(&struct_arr.to_data()).unwrap(); + *arr = arr_v; + *schema = schema_v; +} macro_rules! impl_i16be { diff --git a/src/library.rs b/src/library.rs index bad6229..c2c4e13 100644 --- a/src/library.rs +++ b/src/library.rs @@ -9,7 +9,7 @@ pub use crate::record; pub use crate::record::{RecordHeader, Record}; pub use crate::records; pub use crate::elements; -pub use crate::elements::{read_elements, DListBuilder}; +pub use crate::elements::{read_elements, DListBuilder, FListBuilder}; pub use crate::basic::{IResult, OResult, take_bytes, fail}; use std::string::String; @@ -19,7 +19,7 @@ use std::sync::Arc; use arrow::datatypes::{DataType, Field, Fields}; use arrow::array::{ StructBuilder, StringBuilder, UInt64Builder, UInt32Builder, Int16Builder, Float64Builder, - FixedSizeListBuilder, StructArray, + StructArray, }; @@ -134,8 +134,8 @@ pub fn read_library(input: &[u8]) -> IResult { let text_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), + Field::new("layer", DataType::Int16, false), + Field::new("dtype", DataType::Int16, false), Field::new("presentation_horiz", DataType::UInt8, true), Field::new("presentation_vert", DataType::UInt8, true), Field::new("presentation_font", DataType::UInt8, true), @@ -156,16 +156,16 @@ pub fn read_library(input: &[u8]) -> IResult { )); let boundary_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), + Field::new("layer", DataType::Int16, false), + Field::new("dtype", DataType::Int16, false), Field::new("xy", coords_t.clone(), false), Field::new("properties", property_list_t.clone(), true), ])); let path_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), - Field::new("path_type", DataType::Int16, false), + Field::new("layer", DataType::Int16, false), + Field::new("dtype", DataType::Int16, false), + Field::new("path_type", DataType::Int16, true), Field::new("extension_start", DataType::Int32, true), Field::new("extension_end", DataType::Int32, true), Field::new("width", DataType::Int32, false), @@ -174,8 +174,8 @@ pub fn read_library(input: &[u8]) -> IResult { ])); let boxnode_struct_t = DataType::Struct(Fields::from(vec![ - Field::new("layer", DataType::UInt16, false), - Field::new("dtype", DataType::UInt16, false), + Field::new("layer", DataType::Int16, false), + Field::new("dtype", DataType::Int16, false), Field::new("xy", coords_t.clone(), false), Field::new("properties", property_list_t.clone(), true), ])); @@ -222,6 +222,10 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("texts", text_list_t, false), ])); + let cells_list_t = DataType::List(Arc::new( + Field::new_list_field(cell_struct_t, false) + )); + let mut lib_builder = StructBuilder::from_fields(vec![ Field::new("meters_per_db_unit", DataType::Float64, false), Field::new("user_units_per_db_unit", DataType::Float64, false), @@ -229,7 +233,7 @@ pub fn read_library(input: &[u8]) -> IResult { Field::new("mod_time", time_t.clone(), false), Field::new("acc_time", time_t.clone(), false), Field::new("cell_names", name_list_t, false), - Field::new("cells", cell_struct_t, false), + Field::new("cells", cells_list_t, false), ], 0, ); @@ -241,17 +245,17 @@ pub fn read_library(input: &[u8]) -> IResult { uu_builder.append_value(header.user_units_per_db_unit); let libname_builder = lib_builder.field_builder::(2).unwrap(); libname_builder.append_value(String::from_utf8(header.name).unwrap()); - let mt_builder = lib_builder.field_builder::>(3).unwrap(); - mt_builder.values().append_values(&header.mod_time, &[true; 6]); - let at_builder = lib_builder.field_builder::>(4).unwrap(); - at_builder.values().append_values(&header.acc_time, &[true; 6]); - - - - let cells_builder = lib_builder.field_builder::(5).unwrap(); - + let mtl_builder = lib_builder.field_builder::(3).unwrap(); + let mt_builder = mtl_builder.values().as_any_mut().downcast_mut::().unwrap(); + mt_builder.append_values(&header.mod_time, &[true; 6]); + mtl_builder.append(true); + let atl_builder = lib_builder.field_builder::(4).unwrap(); + let at_builder = atl_builder.values().as_any_mut().downcast_mut::().unwrap(); + at_builder.append_values(&header.acc_time, &[true; 6]); + atl_builder.append(true); let mut names = HashMap::::new(); + let cells_builder = lib_builder.field_builder::(6).unwrap(); let (mut input, mut header) = RecordHeader::read(input)?; while header.tag != records::RTAG_ENDLIB { @@ -260,6 +264,7 @@ pub fn read_library(input: &[u8]) -> IResult { let name_bytes; (input, name_bytes) = records::STRNAME::read(input)?; let name = String::from_utf8(name_bytes).unwrap(); + println!("{name}"); let next_id = names.len(); let id = names.entry(name).or_insert(next_id.try_into().unwrap()); @@ -274,16 +279,19 @@ pub fn read_library(input: &[u8]) -> IResult { (input, _) = read_elements(input, cell_builder, &mut names)?; - cells_builder.append(true); - } + cell_builder.append(true); + } (input, header) = RecordHeader::read(input)?; } + cells_builder.append(true); let mut ids: HashMap = names.into_iter().map(|(kk, vv)| (vv, kk)).collect(); - let names_builder = lib_builder.field_builder::(6).unwrap(); + let names_builder = lib_builder.field_builder::(5).unwrap(); + let name_builder = names_builder.values().as_any_mut().downcast_mut::().unwrap(); for id in 0..ids.len() { - names_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); + name_builder.append_value(ids.remove(&id.try_into().unwrap()).unwrap()); } + names_builder.append(true); lib_builder.append(true); let lib = lib_builder.finish(); diff --git a/src/misc.py b/src/misc.py index ab4c9bf..1e507c7 100644 --- a/src/misc.py +++ b/src/misc.py @@ -19,7 +19,6 @@ https://github.com/apache/arrow/blob/main/python/pyarrow/tests/test_cffi.py # specific language governing permissions and limitations # under the License. -import contextlib import ctypes import pyarrow as pa from pyarrow.cffi import ffi diff --git a/src/record.rs b/src/record.rs index 8d106c9..3226dbb 100644 --- a/src/record.rs +++ b/src/record.rs @@ -17,6 +17,7 @@ use crate::records; #[repr(C)] +#[derive(Debug, Clone)] pub struct RecordHeader { pub tag: u16, pub data_size: u16, diff --git a/src/records.rs b/src/records.rs index 4fe21fa..5a7e2cc 100644 --- a/src/records.rs +++ b/src/records.rs @@ -112,7 +112,7 @@ impl Record for HEADER { pub struct BGNLIB; impl Record for BGNLIB { fn tag() -> u16 { RTAG_BGNLIB } - fn expected_size() -> Option { Some(2 * 6) } + fn expected_size() -> Option { Some(2 * 2 * 6) } } pub struct LIBNAME; @@ -143,7 +143,7 @@ impl Record for BGNSTR { pub struct STRNAME; impl Record for STRNAME { fn tag() -> u16 { RTAG_STRNAME } - fn expected_size() -> Option { Some(2 * 6) } + fn expected_size() -> Option { None } } pub struct ENDSTR; diff --git a/test.py b/test.py new file mode 100644 index 0000000..998dff1 --- /dev/null +++ b/test.py @@ -0,0 +1,35 @@ +import ctypes +import pyarrow +from pyarrow.cffi import ffi + +#c_schema = ffi.new('struct ArrowSchema*') +#c_array = ffi.new('struct ArrowArray*') +#ptr_schema = int(ffi.cast('uintptr_t', c_schema)) +#ptr_array = int(ffi.cast('uintptr_t', c_array)) + + +path = '/home/jan/projects/masque/test.gds' + +#clib = ctypes.CDLL('./libklamath_rs_ext.so') +clib = ffi.dlopen('./libklamath_rs_ext.so') + + +ret_ptr_array = ffi.new('struct ArrowArray[]', 1) +ret_ptr_schema = ffi.new('struct ArrowSchema[]', 1) +ffi.cdef('void read_path(char* path, struct ArrowArray* array, struct ArrowSchema* schema);') +print(f'{ret_ptr_array[0]=}, {ret_ptr_schema[0]=}') + +clib.read_path(path.encode(), ret_ptr_array, ret_ptr_schema) + +ptr_schema = int(ffi.cast('uintptr_t', ret_ptr_schema)) +ptr_array = int(ffi.cast('uintptr_t', ret_ptr_array)) + + +print(f'{ret_ptr_array[0]=}, {ret_ptr_schema[0]=}') +print(f'python {ptr_array=:x} {ptr_schema=:x}') +#print(f'{ret_ptr_array[0].buffers=} {ret_ptr_schema[0][0]=}') + +arr_new = pyarrow.Array._import_from_c(ptr_array, ptr_schema) + + +