From 09fec67a2173e9d777c00672bb26fa4b9898a691 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 2 Apr 2026 13:21:59 -0700 Subject: [PATCH] [gdsii_arrow] misc correctness work --- masque/file/gdsii_arrow.py | 173 +++++++++++++++++++++++------- masque/test/test_gdsii_arrow.py | 180 ++++++++++++++++++++++++++++++++ 2 files changed, 316 insertions(+), 37 deletions(-) create mode 100644 masque/test/test_gdsii_arrow.py diff --git a/masque/file/gdsii_arrow.py b/masque/file/gdsii_arrow.py index e56a48e..546d575 100644 --- a/masque/file/gdsii_arrow.py +++ b/masque/file/gdsii_arrow.py @@ -25,12 +25,16 @@ Notes: """ from typing import IO, cast, Any from collections.abc import Iterable, Mapping, Callable +from importlib.machinery import EXTENSION_SUFFIXES +import importlib.util import io import mmap import logging +import os import pathlib import gzip import string +import sys from pprint import pformat import numpy @@ -49,9 +53,10 @@ from ..library import LazyLibrary, Library, ILibrary, ILibraryView logger = logging.getLogger(__name__) -clib = ffi.dlopen('/home/jan/projects/klamath-rs/target/release/libklamath_rs_ext.so') ffi.cdef('void read_path(char* path, struct ArrowArray* array, struct ArrowSchema* schema);') +clib: Any | None = None + path_cap_map = { 0: Path.Cap.Flush, @@ -65,16 +70,123 @@ def rint_cast(val: ArrayLike) -> NDArray[numpy.int32]: return numpy.rint(val).astype(numpy.int32) +def _packed_layer_u32_to_pairs(values: NDArray[numpy.unsignedinteger[Any]]) -> NDArray[numpy.int16]: + layer = (values >> numpy.uint32(16)).astype(numpy.uint16).view(numpy.int16) + dtype = (values & numpy.uint32(0xffff)).astype(numpy.uint16).view(numpy.int16) + return numpy.stack((layer, dtype), axis=-1) + + +def _packed_counts_u32_to_pairs(values: NDArray[numpy.unsignedinteger[Any]]) -> NDArray[numpy.int64]: + a_count = (values >> numpy.uint32(16)).astype(numpy.uint16).astype(numpy.int64) + b_count = (values & numpy.uint32(0xffff)).astype(numpy.uint16).astype(numpy.int64) + return numpy.stack((a_count, b_count), axis=-1) + + +def _packed_xy_u64_to_pairs(values: NDArray[numpy.unsignedinteger[Any]]) -> NDArray[numpy.int32]: + xx = (values >> numpy.uint64(32)).astype(numpy.uint32).view(numpy.int32) + yy = (values & numpy.uint64(0xffff_ffff)).astype(numpy.uint32).view(numpy.int32) + return numpy.stack((xx, yy), axis=-1) + + +def _local_library_filename() -> str: + if sys.platform.startswith('linux'): + return 'libklamath_rs_ext.so' + if sys.platform == 'darwin': + return 'libklamath_rs_ext.dylib' + if sys.platform == 'win32': + return 'klamath_rs_ext.dll' + raise OSError(f'Unsupported platform for klamath_rs_ext: {sys.platform!r}') + + +def _installed_library_candidates() -> list[pathlib.Path]: + candidates: list[pathlib.Path] = [] + + try: + spec = importlib.util.find_spec('klamath_rs_ext.klamath_rs_ext') + except ModuleNotFoundError: + spec = None + if spec is not None and spec.origin is not None: + candidates.append(pathlib.Path(spec.origin)) + + try: + pkg_spec = importlib.util.find_spec('klamath_rs_ext') + except ModuleNotFoundError: + pkg_spec = None + if pkg_spec is not None and pkg_spec.submodule_search_locations is not None: + for location in pkg_spec.submodule_search_locations: + pkg_dir = pathlib.Path(location) + for suffix in EXTENSION_SUFFIXES: + candidates.extend(sorted(pkg_dir.glob(f'klamath_rs_ext*{suffix}'))) + + return candidates + + +def _repo_library_candidates() -> list[pathlib.Path]: + repo_root = pathlib.Path(__file__).resolve().parents[2] + library_name = _local_library_filename() + return [ + repo_root / 'klamath-rs' / 'target' / 'release' / library_name, + repo_root / 'klamath-rs' / 'target' / 'debug' / library_name, + ] + + +def find_klamath_rs_library() -> pathlib.Path | None: + env_path = os.environ.get('KLAMATH_RS_EXT_LIB') + if env_path: + candidate = pathlib.Path(env_path).expanduser() + if candidate.exists(): + return candidate.resolve() + + seen: set[pathlib.Path] = set() + for candidate in _installed_library_candidates() + _repo_library_candidates(): + resolved = candidate.expanduser() + if resolved in seen: + continue + seen.add(resolved) + if resolved.exists(): + return resolved.resolve() + return None + + +def is_available() -> bool: + return find_klamath_rs_library() is not None + + +def _get_clib() -> Any: + global clib + if clib is None: + lib_path = find_klamath_rs_library() + if lib_path is None: + raise ImportError( + 'Could not locate klamath_rs_ext shared library. ' + 'Build klamath-rs with `cargo build --release --manifest-path klamath-rs/Cargo.toml` ' + 'or set KLAMATH_RS_EXT_LIB to the built library path.' + ) + clib = ffi.dlopen(str(lib_path)) + return clib + + +def _read_annotations( + prop_offs: NDArray[numpy.integer[Any]], + prop_key: NDArray[numpy.integer[Any]], + prop_val: list[str], + ee: int, + ) -> annotations_t: + prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1] + if prop_ii >= prop_ff: + return None + return {str(prop_key[off]): [prop_val[off]] for off in range(prop_ii, prop_ff)} + + def _read_to_arrow( filename: str | pathlib.Path, *args, **kwargs, ) -> pyarrow.Array: - path = pathlib.Path(filename) - path.resolve() + path = pathlib.Path(filename).expanduser().resolve() ptr_array = ffi.new('struct ArrowArray[]', 1) ptr_schema = ffi.new('struct ArrowSchema[]', 1) - clib.read_path(str(path).encode(), ptr_array, ptr_schema) + _get_clib().read_path(str(path).encode(), ptr_array, ptr_schema) iptr_schema = int(ffi.cast('uintptr_t', ptr_schema)) iptr_array = int(ffi.cast('uintptr_t', ptr_array)) @@ -132,7 +244,7 @@ def read_arrow( """ library_info = _read_header(libarr) - layer_names_np = libarr['layers'].values.to_numpy().view('i2').reshape((-1, 2)) + layer_names_np = _packed_layer_u32_to_pairs(libarr['layers'].values.to_numpy()) layer_tups = [tuple(pair) for pair in layer_names_np] cell_ids = libarr['cells'].values.field('id').to_numpy() @@ -155,14 +267,14 @@ def read_arrow( refs = dict( offsets = rf.offsets.to_numpy(), targets = rf.values.field('target').to_numpy(), - xy = rf.values.field('xy').to_numpy().view('i4').reshape((-1, 2)), + xy = _packed_xy_u64_to_pairs(rf.values.field('xy').to_numpy()), invert_y = rf.values.field('invert_y').fill_null(False).to_numpy(zero_copy_only=False), - angle_rad = numpy.rad2deg(rf.values.field('angle_deg').fill_null(0).to_numpy()), + angle_rad = numpy.deg2rad(rf.values.field('angle_deg').fill_null(0).to_numpy()), scale = rf.values.field('mag').fill_null(1).to_numpy(), rep_valid = rf.values.field('repetition').is_valid().to_numpy(zero_copy_only=False), - rep_xy0 = rf.values.field('repetition').field('xy0').fill_null(0).to_numpy().view('i4').reshape((-1, 2)), - rep_xy1 = rf.values.field('repetition').field('xy1').fill_null(0).to_numpy().view('i4').reshape((-1, 2)), - rep_counts = rf.values.field('repetition').field('counts').fill_null(0).to_numpy().view('i2').reshape((-1, 2)), + rep_xy0 = _packed_xy_u64_to_pairs(rf.values.field('repetition').field('xy0').fill_null(0).to_numpy()), + rep_xy1 = _packed_xy_u64_to_pairs(rf.values.field('repetition').field('xy1').fill_null(0).to_numpy()), + rep_counts = _packed_counts_u32_to_pairs(rf.values.field('repetition').field('counts').fill_null(0).to_numpy()), prop_off = rf.values.field('properties').offsets.to_numpy(), prop_key = rf.values.field('properties').values.field('key').to_numpy(), prop_val = rf.values.field('properties').values.field('value').to_pylist(), @@ -172,7 +284,7 @@ def read_arrow( texts = dict( offsets = txt.offsets.to_numpy(), layer_inds = txt.values.field('layer').to_numpy(), - xy = txt.values.field('xy').to_numpy().view('i4').reshape((-1, 2)), + xy = _packed_xy_u64_to_pairs(txt.values.field('xy').to_numpy()), string = txt.values.field('string').to_pylist(), prop_off = txt.values.field('properties').offsets.to_numpy(), prop_key = txt.values.field('properties').values.field('key').to_numpy(), @@ -222,9 +334,9 @@ def _read_header(libarr: pyarrow.Array) -> dict[str, Any]: Read the file header and create the library_info dict. """ library_info = dict( - name = libarr['lib_name'], - meters_per_unit = libarr['meters_per_db_unit'], - logical_units_per_unit = libarr['user_units_per_db_unit'], + name = libarr['lib_name'].as_py(), + meters_per_unit = libarr['meters_per_db_unit'].as_py(), + logical_units_per_unit = libarr['user_units_per_db_unit'].as_py(), ) return library_info @@ -245,6 +357,8 @@ def _grefs_to_mrefs( elem_count = elem_off[cc + 1] - elem_off[cc] elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem prop_offs = elem['prop_off'][elem_slc] # which props belong to each element + elem_targets = targets[elem_slc][:elem_count] + elem_xy = xy[elem_slc][:elem_count] elem_invert_y = elem['invert_y'][elem_slc][:elem_count] elem_angle_rad = elem['angle_rad'][elem_slc][:elem_count] elem_scale = elem['scale'][elem_slc][:elem_count] @@ -255,8 +369,8 @@ def _grefs_to_mrefs( for ee in range(elem_count): - target = cell_names[targets[ee]] - offset = xy[ee] + target = cell_names[elem_targets[ee]] + offset = elem_xy[ee] mirr = elem_invert_y[ee] rot = elem_angle_rad[ee] mag = elem_scale[ee] @@ -268,11 +382,7 @@ def _grefs_to_mrefs( a_count, b_count = elem_rep_counts[ee] rep = Grid(a_vector=a_vector, b_vector=b_vector, a_count=a_count, b_count=b_count) - annotations: None | dict[str, list[int | float | str]] = None - prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1] - if prop_ii < prop_ff: - annotations = {str(prop_key[off]): [prop_val[off]] for off in range(prop_ii, prop_ff)} - + annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) ref = Ref(offset=offset, mirrored=mirr, rotation=rot, scale=mag, repetition=rep, annotations=annotations) pat.refs[target].append(ref) @@ -293,19 +403,16 @@ def _texts_to_labels( elem_count = elem_off[cc + 1] - elem_off[cc] elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem prop_offs = elem['prop_off'][elem_slc] # which props belong to each element + elem_xy = xy[elem_slc][:elem_count] elem_layer_inds = layer_inds[elem_slc][:elem_count] elem_strings = elem['string'][elem_slc][:elem_count] for ee in range(elem_count): layer = layer_tups[elem_layer_inds[ee]] - offset = xy[ee] + offset = elem_xy[ee] string = elem_strings[ee] - annotations: None | dict[str, list[int | float | str]] = None - prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1] - if prop_ii < prop_ff: - annotations = {str(prop_key[off]): [prop_val[off]] for off in range(prop_ii, prop_ff)} - + annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) mlabel = Label(string=string, offset=offset, annotations=annotations) pat.labels[layer].append(mlabel) @@ -345,11 +452,7 @@ def _gpaths_to_mpaths( else: cap_extensions = None - annotations: None | dict[str, list[int | float | str]] = None - prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1] - if prop_ii < prop_ff: - annotations = {str(prop_key[off]): [prop_val[off]] for off in range(prop_ii, prop_ff)} - + annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) path = Path(vertices=vertices, offset=zeros[ee], annotations=annotations, raw=raw_mode, width=width, cap=cap,cap_extensions=cap_extensions) pat.shapes[layer].append(path) @@ -406,11 +509,7 @@ def _boundaries_to_polygons( layer = layer_tups[elem_layer_inds[ee]] vertices = xy_val[xy_offs[ee]:xy_offs[ee + 1] - 1] # -1 to drop closing point - annotations: None | dict[str, list[int | float | str]] = None - prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1] - if prop_ii < prop_ff: - annotations = {str(prop_key[off]): prop_val[off] for off in range(prop_ii, prop_ff)} - + annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) poly = Polygon(vertices=vertices, offset=zeros[ee], annotations=annotations, raw=raw_mode) pat.shapes[layer].append(poly) diff --git a/masque/test/test_gdsii_arrow.py b/masque/test/test_gdsii_arrow.py new file mode 100644 index 0000000..26713e9 --- /dev/null +++ b/masque/test/test_gdsii_arrow.py @@ -0,0 +1,180 @@ +from pathlib import Path + +import numpy +import pytest + +pytest.importorskip('pyarrow') + +from ..library import Library +from ..pattern import Pattern +from ..repetition import Grid +from ..shapes import Path as MPath +from ..file import gdsii, gdsii_arrow +from ..file.gdsii_perf import write_fixture + + +if not gdsii_arrow.is_available(): + pytest.skip('klamath_rs_ext shared library is not available', allow_module_level=True) + + +def _annotations_key(annotations: dict[str, list[object]] | None) -> tuple[tuple[str, tuple[object, ...]], ...] | None: + if annotations is None: + return None + return tuple(sorted((key, tuple(values)) for key, values in annotations.items())) + + +def _coord_key(values: object) -> tuple[int, ...] | tuple[tuple[int, int], ...]: + arr = numpy.rint(numpy.asarray(values, dtype=float)).astype(int) + if arr.ndim == 1: + return tuple(arr.tolist()) + return tuple(tuple(row.tolist()) for row in arr) + + +def _shape_key(shape: object, layer: tuple[int, int]) -> list[tuple[object, ...]]: + if isinstance(shape, MPath): + cap_extensions = None if shape.cap_extensions is None else _coord_key(shape.cap_extensions) + return [( + 'path', + layer, + _coord_key(shape.vertices), + _coord_key(shape.offset), + int(round(float(shape.width))), + shape.cap.name, + cap_extensions, + _annotations_key(shape.annotations), + )] + + keys = [] + for poly in shape.to_polygons(): + keys.append(( + 'polygon', + layer, + _coord_key(poly.vertices), + _coord_key(poly.offset), + _annotations_key(poly.annotations), + )) + return keys + + +def _ref_key(target: str, ref: object) -> tuple[object, ...]: + repetition = None + if ref.repetition is not None: + repetition = ( + _coord_key(ref.repetition.a_vector), + int(ref.repetition.a_count), + _coord_key(ref.repetition.b_vector), + int(ref.repetition.b_count), + ) + return ( + target, + _coord_key(ref.offset), + round(float(ref.rotation), 8), + round(float(ref.scale), 8), + bool(ref.mirrored), + repetition, + _annotations_key(ref.annotations), + ) + + +def _label_key(layer: tuple[int, int], label: object) -> tuple[object, ...]: + return ( + layer, + label.string, + _coord_key(label.offset), + _annotations_key(label.annotations), + ) + + +def _pattern_summary(pattern: Pattern) -> dict[str, object]: + shape_keys: list[tuple[object, ...]] = [] + for layer, shapes in pattern.shapes.items(): + for shape in shapes: + shape_keys.extend(_shape_key(shape, layer)) + + ref_keys = [ + _ref_key(target, ref) + for target, refs in pattern.refs.items() + for ref in refs + ] + + label_keys = [ + _label_key(layer, label) + for layer, labels in pattern.labels.items() + for label in labels + ] + + return { + 'shapes': sorted(shape_keys), + 'refs': sorted(ref_keys), + 'labels': sorted(label_keys), + } + + +def _library_summary(lib: Library) -> dict[str, dict[str, object]]: + return {name: _pattern_summary(pattern) for name, pattern in lib.items()} + + +def _make_arrow_test_library() -> Library: + lib = Library() + + leaf = Pattern() + leaf.polygon((1, 0), vertices=[[0, 0], [10, 0], [10, 10], [0, 10]], annotations={'1': ['leaf-poly']}) + leaf.polygon((1, 0), vertices=[[20, 0], [30, 0], [30, 10], [20, 10]]) + leaf.label((10, 0), string='LEAF', offset=(3, 4), annotations={'10': ['leaf-label']}) + lib['leaf'] = leaf + + child = Pattern() + child.path( + (2, 0), + vertices=[[0, 0], [15, 5], [30, 5]], + width=6, + cap=MPath.Cap.SquareCustom, + cap_extensions=(2, 4), + annotations={'2': ['child-path']}, + ) + child.label((11, 0), string='CHILD', offset=(7, 8), annotations={'11': ['child-label']}) + child.ref('leaf', offset=(100, 200), rotation=numpy.pi / 2, mirrored=True, scale=1.25, annotations={'12': ['child-ref']}) + lib['child'] = child + + sibling = Pattern() + sibling.polygon((3, 0), vertices=[[0, 0], [5, 0], [5, 6], [0, 6]]) + sibling.label((12, 0), string='SIB', offset=(1, 2), annotations={'13': ['sib-label']}) + sibling.ref( + 'leaf', + offset=(-50, 60), + repetition=Grid(a_vector=(20, 0), a_count=3, b_vector=(0, 30), b_count=2), + annotations={'14': ['sib-ref']}, + ) + lib['sibling'] = sibling + + top = Pattern() + top.ref('child', offset=(500, 600), annotations={'15': ['top-child-ref']}) + top.ref('sibling', offset=(-100, 50), rotation=numpy.pi, annotations={'16': ['top-sibling-ref']}) + top.label((13, 0), string='TOP', offset=(0, 0), annotations={'17': ['top-label']}) + lib['top'] = top + + return lib + + +def test_gdsii_arrow_matches_gdsii_readfile(tmp_path: Path) -> None: + lib = _make_arrow_test_library() + gds_file = tmp_path / 'arrow_roundtrip.gds' + gdsii.writefile(lib, gds_file, meters_per_unit=1e-9) + + canonical_lib, canonical_info = gdsii.readfile(gds_file) + arrow_lib, arrow_info = gdsii_arrow.readfile(gds_file) + + assert canonical_info == arrow_info + assert _library_summary(canonical_lib) == _library_summary(arrow_lib) + + +def test_gdsii_arrow_reads_small_perf_fixture(tmp_path: Path) -> None: + gds_file = tmp_path / 'many_cells_smoke.gds' + manifest = write_fixture(gds_file, preset='many_cells', scale=0.001) + + lib, info = gdsii_arrow.readfile(gds_file) + + assert info['name'] == manifest.library_name + assert len(lib) == manifest.cells + assert 'TOP' in lib + assert sum(len(refs) for refs in lib['TOP'].refs.values()) > 0