From 9e8a8d6a2216efcdf5b05c1fc5d8482a9297877b Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Mon, 20 Apr 2026 21:34:10 -0700 Subject: [PATCH] [gdsii_arrow] remove non-raw-mode arrow option; fix gzip wrapper --- masque/file/gdsii_arrow.py | 115 ++++++++------------------- masque/test/test_gdsii_arrow.py | 39 +++++++++ masque/test/test_gdsii_lazy_arrow.py | 85 ++++++++++++++++++++ 3 files changed, 158 insertions(+), 81 deletions(-) diff --git a/masque/file/gdsii_arrow.py b/masque/file/gdsii_arrow.py index b97005b..a38dee5 100644 --- a/masque/file/gdsii_arrow.py +++ b/masque/file/gdsii_arrow.py @@ -27,7 +27,6 @@ from typing import IO, cast, Any from collections.abc import Iterable, Mapping, Callable from importlib.machinery import EXTENSION_SUFFIXES import importlib.util -import io import mmap import logging import os @@ -35,11 +34,11 @@ import pathlib import gzip import string import sys +import tempfile from pprint import pformat import numpy from numpy.typing import ArrayLike, NDArray -from numpy.testing import assert_equal import pyarrow from pyarrow.cffi import ffi @@ -70,8 +69,6 @@ ffi.cdef( clib: Any | None = None -ZERO_OFFSET = numpy.zeros(2) - path_cap_map = { 0: Path.Cap.Flush, @@ -195,13 +192,22 @@ def _read_annotations( def _read_to_arrow( filename: str | pathlib.Path, - *args, - **kwargs, ) -> pyarrow.Array: path = pathlib.Path(filename).expanduser().resolve() ptr_array = ffi.new('struct ArrowArray[]', 1) ptr_schema = ffi.new('struct ArrowSchema[]', 1) - _get_clib().read_path(str(path).encode(), ptr_array, ptr_schema) + if is_gzipped(path): + with gzip.open(path, mode='rb') as src: + data = src.read() + with tempfile.NamedTemporaryFile(suffix='.gds', delete=False) as tmp_stream: + tmp_stream.write(data) + tmp_name = tmp_stream.name + try: + _get_clib().read_path(tmp_name.encode(), ptr_array, ptr_schema) + finally: + pathlib.Path(tmp_name).unlink(missing_ok=True) + else: + _get_clib().read_path(str(path).encode(), ptr_array, ptr_schema) return _import_arrow_array(ptr_array, ptr_schema) @@ -236,18 +242,14 @@ def _read_selected_cells_to_arrow( def readfile( filename: str | pathlib.Path, - *args, - **kwargs, ) -> tuple[Library, dict[str, Any]]: """ - Wrapper for `read()` that takes a filename or path instead of a stream. + Read a GDSII file from a path into `masque.Library` / `Pattern` objects. Will automatically decompress gzipped files. Args: - filename: Filename to save to. - *args: passed to `read()` - **kwargs: passed to `read()` + filename: Filename to read. For callers that can consume Arrow directly, prefer `readfile_arrow()` to skip Python `Pattern` construction entirely. @@ -284,7 +286,6 @@ def readfile_arrow( def read_arrow( libarr: pyarrow.Array, - raw_mode: bool = True, ) -> tuple[Library, dict[str, Any]]: """ # TODO check GDSII file for cycles! @@ -299,8 +300,7 @@ def read_arrow( per database unit Args: - stream: Stream to read from. - raw_mode: If True, constructs shapes in raw mode, bypassing most data validation, Default True. + libarr: Arrow library payload as returned by `readfile_arrow()`. Returns: - dict of pattern_name:Patterns generated from GDSII structures @@ -436,7 +436,6 @@ def read_arrow( global_args = dict( cell_names = cell_names, layer_tups = layer_tups, - raw_mode = raw_mode, ) mlib = Library() @@ -488,7 +487,6 @@ def _srefs_to_mrefs( elem_invert_y = elem['invert_y'][start:stop] elem_angle_rad = elem['angle_rad'][start:stop] elem_scale = elem['scale'][start:stop] - raw_mode = global_args['raw_mode'] _append_plain_refs_sorted( pat=pat, @@ -498,7 +496,6 @@ def _srefs_to_mrefs( elem_invert_y=elem_invert_y, elem_angle_rad=elem_angle_rad, elem_scale=elem_scale, - raw_mode=raw_mode, ) @@ -511,14 +508,11 @@ def _append_plain_refs_sorted( elem_invert_y: NDArray[numpy.bool_ | numpy.bool], elem_angle_rad: NDArray[numpy.floating[Any]], elem_scale: NDArray[numpy.floating[Any]], - raw_mode: bool, ) -> None: elem_count = len(elem_targets) if elem_count == 0: return - make_ref = Ref._from_raw if raw_mode else Ref - target_start = 0 while target_start < elem_count: target_id = int(elem_targets[target_start]) @@ -528,7 +522,7 @@ def _append_plain_refs_sorted( append_refs = pat.refs[cell_names[target_id]].extend append_refs( - make_ref( + Ref._from_raw( offset=elem_xy[ee], mirrored=elem_invert_y[ee], rotation=elem_angle_rad[ee], @@ -564,10 +558,6 @@ def _arefs_to_mrefs( elem_xy0 = elem['xy0'][start:stop] elem_xy1 = elem['xy1'][start:stop] elem_counts = elem['counts'][start:stop] - raw_mode = global_args['raw_mode'] - - make_ref = Ref._from_raw if raw_mode else Ref - make_grid = Grid._from_raw if raw_mode else Grid if len(elem_targets) == 0: return @@ -581,12 +571,12 @@ def _arefs_to_mrefs( append_ref = pat.refs[cell_names[target_id]].append assert append_ref is not None a_count, b_count = elem_counts[ee] - append_ref(make_ref( + append_ref(Ref._from_raw( offset=elem_xy[ee], mirrored=elem_invert_y[ee], rotation=elem_angle_rad[ee], scale=elem_scale[ee], - repetition=make_grid(a_vector=elem_xy0[ee], b_vector=elem_xy1[ee], a_count=a_count, b_count=b_count), + repetition=Grid._from_raw(a_vector=elem_xy0[ee], b_vector=elem_xy1[ee], a_count=a_count, b_count=b_count), annotations=None, )) @@ -613,13 +603,10 @@ def _sref_props_to_mrefs( elem_invert_y = elem['invert_y'][elem_off[cc]:elem_off[cc + 1]] elem_angle_rad = elem['angle_rad'][elem_off[cc]:elem_off[cc + 1]] elem_scale = elem['scale'][elem_off[cc]:elem_off[cc + 1]] - raw_mode = global_args['raw_mode'] - - make_ref = Ref._from_raw if raw_mode else Ref for ee in range(elem_count): annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - ref = make_ref( + ref = Ref._from_raw( offset=elem_xy[ee], mirrored=elem_invert_y[ee], rotation=elem_angle_rad[ee], @@ -655,20 +642,16 @@ def _aref_props_to_mrefs( elem_xy0 = elem['xy0'][elem_off[cc]:elem_off[cc + 1]] elem_xy1 = elem['xy1'][elem_off[cc]:elem_off[cc + 1]] elem_counts = elem['counts'][elem_off[cc]:elem_off[cc + 1]] - raw_mode = global_args['raw_mode'] - - make_ref = Ref._from_raw if raw_mode else Ref - make_grid = Grid._from_raw if raw_mode else Grid for ee in range(elem_count): a_count, b_count = elem_counts[ee] annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - ref = make_ref( + ref = Ref._from_raw( offset=elem_xy[ee], mirrored=elem_invert_y[ee], rotation=elem_angle_rad[ee], scale=elem_scale[ee], - repetition=make_grid(a_vector=elem_xy0[ee], b_vector=elem_xy1[ee], a_count=a_count, b_count=b_count), + repetition=Grid._from_raw(a_vector=elem_xy0[ee], b_vector=elem_xy1[ee], a_count=a_count, b_count=b_count), annotations=annotations, ) pat.refs[cell_names[int(elem_targets[ee])]].append(ref) @@ -693,7 +676,6 @@ def _texts_to_labels( elem_xy = xy[elem_slc][:elem_count] elem_layer_inds = layer_inds[elem_slc][:elem_count] elem_strings = elem['string'][elem_slc][:elem_count] - raw_mode = global_args['raw_mode'] for ee in range(elem_count): layer = layer_tups[int(elem_layer_inds[ee])] @@ -701,10 +683,7 @@ def _texts_to_labels( string = elem_strings[ee] annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - if raw_mode: - mlabel = Label._from_raw(string=string, offset=offset, annotations=annotations) - else: - mlabel = Label(string=string, offset=offset, annotations=annotations) + mlabel = Label._from_raw(string=string, offset=offset, annotations=annotations) pat.labels[layer].append(mlabel) @@ -730,7 +709,6 @@ def _gpaths_to_mpaths( elem_path_types = elem['path_type'][elem_slc][:elem_count] elem_extensions = elem['extensions'][elem_slc][:elem_count] - raw_mode = global_args['raw_mode'] for ee in range(elem_count): layer = layer_tups[int(elem_layer_inds[ee])] vertices = xy_val[xy_offs[ee]:xy_offs[ee + 1]] @@ -743,23 +721,13 @@ def _gpaths_to_mpaths( cap_extensions = None annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - if raw_mode: - path = Path._from_raw( - vertices=vertices, - width=width, - cap=cap, - cap_extensions=cap_extensions, - annotations=annotations, - ) - else: - path = Path( - vertices=vertices, - width=width, - cap=cap, - cap_extensions=cap_extensions, - offset=ZERO_OFFSET, - annotations=annotations, - ) + path = Path._from_raw( + vertices=vertices, + width=width, + cap=cap, + cap_extensions=cap_extensions, + annotations=annotations, + ) pat.shapes[layer].append(path) @@ -786,23 +754,16 @@ def _boundary_batches_to_polygons( elem_poly_off = poly_off[elem_slc] elem_layer_inds = layer_inds[elem_slc][:batch_count] - raw_mode = global_args['raw_mode'] for bb in range(batch_count): layer = layer_tups[int(elem_layer_inds[bb])] vertices = vert_arr[elem_vert_off[bb]:elem_vert_off[bb + 1]] vertex_offsets = poly_offsets[elem_poly_off[bb]:elem_poly_off[bb + 1]] if vertex_offsets.size == 1: - if raw_mode: - poly = Polygon._from_raw(vertices=vertices, annotations=None) - else: - poly = Polygon(vertices=vertices, offset=ZERO_OFFSET, annotations=None) + poly = Polygon._from_raw(vertices=vertices, annotations=None) pat.shapes[layer].append(poly) else: - if raw_mode: - polys = PolyCollection._from_raw(vertex_lists=vertices, vertex_offsets=vertex_offsets, annotations=None) - else: - polys = PolyCollection(vertex_lists=vertices, vertex_offsets=vertex_offsets, offset=ZERO_OFFSET, annotations=None) + polys = PolyCollection._from_raw(vertex_lists=vertices, vertex_offsets=vertex_offsets, annotations=None) pat.shapes[layer].append(polys) @@ -826,14 +787,10 @@ def _rect_batches_to_rectcollections( elem_rect_off = rect_off[elem_slc] elem_layer_inds = layer_inds[elem_slc][:batch_count] - raw_mode = global_args['raw_mode'] for bb in range(batch_count): layer = layer_tups[int(elem_layer_inds[bb])] rects = rect_arr[elem_rect_off[bb]:elem_rect_off[bb + 1]] - if raw_mode: - rect_collection = RectCollection._from_raw(rects=rects, annotations=None) - else: - rect_collection = RectCollection(rects=rects, offset=ZERO_OFFSET, annotations=None) + rect_collection = RectCollection._from_raw(rects=rects, annotations=None) pat.shapes[layer].append(rect_collection) @@ -860,15 +817,11 @@ def _boundary_props_to_polygons( prop_offs = elem['prop_off'][elem_slc] elem_layer_inds = layer_inds[elem_slc][:elem_count] - raw_mode = global_args['raw_mode'] for ee in range(elem_count): layer = layer_tups[int(elem_layer_inds[ee])] vertices = vert_arr[elem_vert_off[ee]:elem_vert_off[ee + 1]] annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - if raw_mode: - poly = Polygon._from_raw(vertices=vertices, annotations=annotations) - else: - poly = Polygon(vertices=vertices, offset=ZERO_OFFSET, annotations=annotations) + poly = Polygon._from_raw(vertices=vertices, annotations=annotations) pat.shapes[layer].append(poly) diff --git a/masque/test/test_gdsii_arrow.py b/masque/test/test_gdsii_arrow.py index f3f4f6a..660818c 100644 --- a/masque/test/test_gdsii_arrow.py +++ b/masque/test/test_gdsii_arrow.py @@ -187,6 +187,18 @@ def test_gdsii_arrow_matches_gdsii_readfile(tmp_path: Path) -> None: assert _library_summary(canonical_lib) == _library_summary(arrow_lib) +def test_gdsii_arrow_matches_gdsii_readfile_for_gzipped_file(tmp_path: Path) -> None: + lib = _make_arrow_test_library() + gds_file = tmp_path / 'arrow_roundtrip.gds.gz' + gdsii.writefile(lib, gds_file, meters_per_unit=1e-9) + + canonical_lib, canonical_info = gdsii.readfile(gds_file) + arrow_lib, arrow_info = gdsii_arrow.readfile(gds_file) + + assert canonical_info == arrow_info + assert _library_summary(canonical_lib) == _library_summary(arrow_lib) + + def test_gdsii_arrow_readfile_arrow_returns_native_payload(tmp_path: Path) -> None: gds_file = tmp_path / 'many_cells_native.gds' manifest = write_fixture(gds_file, preset='many_cells', scale=0.001) @@ -199,6 +211,33 @@ def test_gdsii_arrow_readfile_arrow_returns_native_payload(tmp_path: Path) -> No assert 0 < len(libarr['layers']) <= manifest.layers +def test_gdsii_arrow_readfile_arrow_reads_gzipped_file(tmp_path: Path) -> None: + lib = _make_arrow_test_library() + gds_file = tmp_path / 'native_payload.gds.gz' + gdsii.writefile(lib, gds_file, meters_per_unit=1e-9) + + libarr, info = gdsii_arrow.readfile_arrow(gds_file) + + assert info['name'] == 'masque-klamath' + assert libarr['lib_name'].as_py() == 'masque-klamath' + assert len(libarr['cells']) == len(lib) + assert len(libarr['layers']) > 0 + + +def test_gdsii_arrow_removed_raw_mode_arg(tmp_path: Path) -> None: + lib = _make_arrow_test_library() + gds_file = tmp_path / 'removed_raw_mode.gds' + gdsii.writefile(lib, gds_file, meters_per_unit=1e-9) + + libarr, _ = gdsii_arrow.readfile_arrow(gds_file) + + with pytest.raises(TypeError): + gdsii_arrow.readfile(gds_file, raw_mode=False) + + with pytest.raises(TypeError): + gdsii_arrow.read_arrow(libarr, raw_mode=False) + + def test_gdsii_arrow_reads_small_perf_fixture(tmp_path: Path) -> None: gds_file = tmp_path / 'many_cells_smoke.gds' manifest = write_fixture(gds_file, preset='many_cells', scale=0.001) diff --git a/masque/test/test_gdsii_lazy_arrow.py b/masque/test/test_gdsii_lazy_arrow.py index 61a99af..b30b058 100644 --- a/masque/test/test_gdsii_lazy_arrow.py +++ b/masque/test/test_gdsii_lazy_arrow.py @@ -34,6 +34,78 @@ def _make_small_library() -> Library: return lib +def _make_complex_ref_library() -> Library: + lib = Library() + + leaf = Pattern() + leaf.polygon((1, 0), vertices=[[0, 0], [10, 0], [10, 10], [0, 10]]) + lib['leaf'] = leaf + + child = Pattern() + child.ref('leaf', offset=(100, 200), rotation=numpy.pi / 2, mirrored=True, scale=1.25) + lib['child'] = child + + sibling = Pattern() + sibling.ref( + 'leaf', + offset=(-50, 60), + repetition=Grid(a_vector=(20, 0), a_count=3, b_vector=(0, 30), b_count=2), + ) + lib['sibling'] = sibling + + fanout = Pattern() + fanout.ref('leaf', offset=(0, 0)) + fanout.ref('child', offset=(10, 0), mirrored=True, rotation=numpy.pi / 6, scale=1.1) + fanout.ref('leaf', offset=(30, 0), repetition=Grid(a_vector=(5, 0), a_count=2, b_vector=(0, 7), b_count=3)) + fanout.ref( + 'child', + offset=(40, 0), + mirrored=True, + rotation=numpy.pi / 4, + scale=1.2, + repetition=Grid(a_vector=(9, 0), a_count=2, b_vector=(0, 11), b_count=2), + ) + lib['fanout'] = fanout + + top = Pattern() + top.ref('child', offset=(500, 600)) + top.ref('sibling', offset=(-100, 50), rotation=numpy.pi) + top.ref('fanout', offset=(250, -75)) + lib['top'] = top + + return lib + + +def _transform_rows_key(values: numpy.ndarray) -> tuple[tuple[object, ...], ...]: + arr = numpy.asarray(values, dtype=float) + arr = numpy.atleast_2d(arr) + rows = [ + ( + round(float(row[0]), 8), + round(float(row[1]), 8), + round(float(row[2]), 8), + bool(int(round(float(row[3])))), + round(float(row[4]), 8), + ) + for row in arr + ] + return tuple(sorted(rows)) + + +def _local_refs_key(refs: dict[str, list[numpy.ndarray]]) -> dict[str, tuple[tuple[object, ...], ...]]: + return { + parent: _transform_rows_key(numpy.concatenate(transforms)) + for parent, transforms in refs.items() + } + + +def _global_refs_key(refs: dict[tuple[str, ...], numpy.ndarray]) -> dict[tuple[str, ...], tuple[tuple[object, ...], ...]]: + return { + path: _transform_rows_key(transforms) + for path, transforms in refs.items() + } + + def test_gdsii_lazy_arrow_loads_perf_fixture(tmp_path: Path) -> None: gds_file = tmp_path / 'many_cells_lazy.gds' manifest = write_fixture(gds_file, preset='many_cells', scale=0.001) @@ -62,6 +134,19 @@ def test_gdsii_lazy_arrow_local_and_global_refs(tmp_path: Path) -> None: assert global_refs[('top', 'mid', 'leaf')].shape[0] == 5 +def test_gdsii_lazy_arrow_ref_queries_match_eager_reader(tmp_path: Path) -> None: + gds_file = tmp_path / 'complex_refs.gds' + src = _make_complex_ref_library() + gdsii.writefile(src, gds_file, meters_per_unit=1e-9, library_name='lazy-complex-refs') + + eager, _ = gdsii.readfile(gds_file) + lazy, _ = gdsii_lazy_arrow.readfile(gds_file) + + for name in ('leaf', 'child'): + assert _local_refs_key(lazy.find_refs_local(name)) == _local_refs_key(eager.find_refs_local(name)) + assert _global_refs_key(lazy.find_refs_global(name)) == _global_refs_key(eager.find_refs_global(name)) + + def test_gdsii_lazy_arrow_untouched_write_is_copy_through(tmp_path: Path) -> None: gds_file = tmp_path / 'copy_source.gds' src = _make_small_library()