[gdsii_arrow] remove non-raw-mode arrow option; fix gzip wrapper

This commit is contained in:
Jan Petykiewicz 2026-04-20 21:34:10 -07:00
commit 9e8a8d6a22
3 changed files with 158 additions and 81 deletions

View file

@ -27,7 +27,6 @@ from typing import IO, cast, Any
from collections.abc import Iterable, Mapping, Callable
from importlib.machinery import EXTENSION_SUFFIXES
import importlib.util
import io
import mmap
import logging
import os
@ -35,11 +34,11 @@ import pathlib
import gzip
import string
import sys
import tempfile
from pprint import pformat
import numpy
from numpy.typing import ArrayLike, NDArray
from numpy.testing import assert_equal
import pyarrow
from pyarrow.cffi import ffi
@ -70,8 +69,6 @@ ffi.cdef(
clib: Any | None = None
ZERO_OFFSET = numpy.zeros(2)
path_cap_map = {
0: Path.Cap.Flush,
@ -195,12 +192,21 @@ def _read_annotations(
def _read_to_arrow(
filename: str | pathlib.Path,
*args,
**kwargs,
) -> pyarrow.Array:
path = pathlib.Path(filename).expanduser().resolve()
ptr_array = ffi.new('struct ArrowArray[]', 1)
ptr_schema = ffi.new('struct ArrowSchema[]', 1)
if is_gzipped(path):
with gzip.open(path, mode='rb') as src:
data = src.read()
with tempfile.NamedTemporaryFile(suffix='.gds', delete=False) as tmp_stream:
tmp_stream.write(data)
tmp_name = tmp_stream.name
try:
_get_clib().read_path(tmp_name.encode(), ptr_array, ptr_schema)
finally:
pathlib.Path(tmp_name).unlink(missing_ok=True)
else:
_get_clib().read_path(str(path).encode(), ptr_array, ptr_schema)
return _import_arrow_array(ptr_array, ptr_schema)
@ -236,18 +242,14 @@ def _read_selected_cells_to_arrow(
def readfile(
filename: str | pathlib.Path,
*args,
**kwargs,
) -> tuple[Library, dict[str, Any]]:
"""
Wrapper for `read()` that takes a filename or path instead of a stream.
Read a GDSII file from a path into `masque.Library` / `Pattern` objects.
Will automatically decompress gzipped files.
Args:
filename: Filename to save to.
*args: passed to `read()`
**kwargs: passed to `read()`
filename: Filename to read.
For callers that can consume Arrow directly, prefer `readfile_arrow()`
to skip Python `Pattern` construction entirely.
@ -284,7 +286,6 @@ def readfile_arrow(
def read_arrow(
libarr: pyarrow.Array,
raw_mode: bool = True,
) -> tuple[Library, dict[str, Any]]:
"""
# TODO check GDSII file for cycles!
@ -299,8 +300,7 @@ def read_arrow(
per database unit
Args:
stream: Stream to read from.
raw_mode: If True, constructs shapes in raw mode, bypassing most data validation, Default True.
libarr: Arrow library payload as returned by `readfile_arrow()`.
Returns:
- dict of pattern_name:Patterns generated from GDSII structures
@ -436,7 +436,6 @@ def read_arrow(
global_args = dict(
cell_names = cell_names,
layer_tups = layer_tups,
raw_mode = raw_mode,
)
mlib = Library()
@ -488,7 +487,6 @@ def _srefs_to_mrefs(
elem_invert_y = elem['invert_y'][start:stop]
elem_angle_rad = elem['angle_rad'][start:stop]
elem_scale = elem['scale'][start:stop]
raw_mode = global_args['raw_mode']
_append_plain_refs_sorted(
pat=pat,
@ -498,7 +496,6 @@ def _srefs_to_mrefs(
elem_invert_y=elem_invert_y,
elem_angle_rad=elem_angle_rad,
elem_scale=elem_scale,
raw_mode=raw_mode,
)
@ -511,14 +508,11 @@ def _append_plain_refs_sorted(
elem_invert_y: NDArray[numpy.bool_ | numpy.bool],
elem_angle_rad: NDArray[numpy.floating[Any]],
elem_scale: NDArray[numpy.floating[Any]],
raw_mode: bool,
) -> None:
elem_count = len(elem_targets)
if elem_count == 0:
return
make_ref = Ref._from_raw if raw_mode else Ref
target_start = 0
while target_start < elem_count:
target_id = int(elem_targets[target_start])
@ -528,7 +522,7 @@ def _append_plain_refs_sorted(
append_refs = pat.refs[cell_names[target_id]].extend
append_refs(
make_ref(
Ref._from_raw(
offset=elem_xy[ee],
mirrored=elem_invert_y[ee],
rotation=elem_angle_rad[ee],
@ -564,10 +558,6 @@ def _arefs_to_mrefs(
elem_xy0 = elem['xy0'][start:stop]
elem_xy1 = elem['xy1'][start:stop]
elem_counts = elem['counts'][start:stop]
raw_mode = global_args['raw_mode']
make_ref = Ref._from_raw if raw_mode else Ref
make_grid = Grid._from_raw if raw_mode else Grid
if len(elem_targets) == 0:
return
@ -581,12 +571,12 @@ def _arefs_to_mrefs(
append_ref = pat.refs[cell_names[target_id]].append
assert append_ref is not None
a_count, b_count = elem_counts[ee]
append_ref(make_ref(
append_ref(Ref._from_raw(
offset=elem_xy[ee],
mirrored=elem_invert_y[ee],
rotation=elem_angle_rad[ee],
scale=elem_scale[ee],
repetition=make_grid(a_vector=elem_xy0[ee], b_vector=elem_xy1[ee], a_count=a_count, b_count=b_count),
repetition=Grid._from_raw(a_vector=elem_xy0[ee], b_vector=elem_xy1[ee], a_count=a_count, b_count=b_count),
annotations=None,
))
@ -613,13 +603,10 @@ def _sref_props_to_mrefs(
elem_invert_y = elem['invert_y'][elem_off[cc]:elem_off[cc + 1]]
elem_angle_rad = elem['angle_rad'][elem_off[cc]:elem_off[cc + 1]]
elem_scale = elem['scale'][elem_off[cc]:elem_off[cc + 1]]
raw_mode = global_args['raw_mode']
make_ref = Ref._from_raw if raw_mode else Ref
for ee in range(elem_count):
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
ref = make_ref(
ref = Ref._from_raw(
offset=elem_xy[ee],
mirrored=elem_invert_y[ee],
rotation=elem_angle_rad[ee],
@ -655,20 +642,16 @@ def _aref_props_to_mrefs(
elem_xy0 = elem['xy0'][elem_off[cc]:elem_off[cc + 1]]
elem_xy1 = elem['xy1'][elem_off[cc]:elem_off[cc + 1]]
elem_counts = elem['counts'][elem_off[cc]:elem_off[cc + 1]]
raw_mode = global_args['raw_mode']
make_ref = Ref._from_raw if raw_mode else Ref
make_grid = Grid._from_raw if raw_mode else Grid
for ee in range(elem_count):
a_count, b_count = elem_counts[ee]
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
ref = make_ref(
ref = Ref._from_raw(
offset=elem_xy[ee],
mirrored=elem_invert_y[ee],
rotation=elem_angle_rad[ee],
scale=elem_scale[ee],
repetition=make_grid(a_vector=elem_xy0[ee], b_vector=elem_xy1[ee], a_count=a_count, b_count=b_count),
repetition=Grid._from_raw(a_vector=elem_xy0[ee], b_vector=elem_xy1[ee], a_count=a_count, b_count=b_count),
annotations=annotations,
)
pat.refs[cell_names[int(elem_targets[ee])]].append(ref)
@ -693,7 +676,6 @@ def _texts_to_labels(
elem_xy = xy[elem_slc][:elem_count]
elem_layer_inds = layer_inds[elem_slc][:elem_count]
elem_strings = elem['string'][elem_slc][:elem_count]
raw_mode = global_args['raw_mode']
for ee in range(elem_count):
layer = layer_tups[int(elem_layer_inds[ee])]
@ -701,10 +683,7 @@ def _texts_to_labels(
string = elem_strings[ee]
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
if raw_mode:
mlabel = Label._from_raw(string=string, offset=offset, annotations=annotations)
else:
mlabel = Label(string=string, offset=offset, annotations=annotations)
pat.labels[layer].append(mlabel)
@ -730,7 +709,6 @@ def _gpaths_to_mpaths(
elem_path_types = elem['path_type'][elem_slc][:elem_count]
elem_extensions = elem['extensions'][elem_slc][:elem_count]
raw_mode = global_args['raw_mode']
for ee in range(elem_count):
layer = layer_tups[int(elem_layer_inds[ee])]
vertices = xy_val[xy_offs[ee]:xy_offs[ee + 1]]
@ -743,7 +721,6 @@ def _gpaths_to_mpaths(
cap_extensions = None
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
if raw_mode:
path = Path._from_raw(
vertices=vertices,
width=width,
@ -751,15 +728,6 @@ def _gpaths_to_mpaths(
cap_extensions=cap_extensions,
annotations=annotations,
)
else:
path = Path(
vertices=vertices,
width=width,
cap=cap,
cap_extensions=cap_extensions,
offset=ZERO_OFFSET,
annotations=annotations,
)
pat.shapes[layer].append(path)
@ -786,23 +754,16 @@ def _boundary_batches_to_polygons(
elem_poly_off = poly_off[elem_slc]
elem_layer_inds = layer_inds[elem_slc][:batch_count]
raw_mode = global_args['raw_mode']
for bb in range(batch_count):
layer = layer_tups[int(elem_layer_inds[bb])]
vertices = vert_arr[elem_vert_off[bb]:elem_vert_off[bb + 1]]
vertex_offsets = poly_offsets[elem_poly_off[bb]:elem_poly_off[bb + 1]]
if vertex_offsets.size == 1:
if raw_mode:
poly = Polygon._from_raw(vertices=vertices, annotations=None)
else:
poly = Polygon(vertices=vertices, offset=ZERO_OFFSET, annotations=None)
pat.shapes[layer].append(poly)
else:
if raw_mode:
polys = PolyCollection._from_raw(vertex_lists=vertices, vertex_offsets=vertex_offsets, annotations=None)
else:
polys = PolyCollection(vertex_lists=vertices, vertex_offsets=vertex_offsets, offset=ZERO_OFFSET, annotations=None)
pat.shapes[layer].append(polys)
@ -826,14 +787,10 @@ def _rect_batches_to_rectcollections(
elem_rect_off = rect_off[elem_slc]
elem_layer_inds = layer_inds[elem_slc][:batch_count]
raw_mode = global_args['raw_mode']
for bb in range(batch_count):
layer = layer_tups[int(elem_layer_inds[bb])]
rects = rect_arr[elem_rect_off[bb]:elem_rect_off[bb + 1]]
if raw_mode:
rect_collection = RectCollection._from_raw(rects=rects, annotations=None)
else:
rect_collection = RectCollection(rects=rects, offset=ZERO_OFFSET, annotations=None)
pat.shapes[layer].append(rect_collection)
@ -860,15 +817,11 @@ def _boundary_props_to_polygons(
prop_offs = elem['prop_off'][elem_slc]
elem_layer_inds = layer_inds[elem_slc][:elem_count]
raw_mode = global_args['raw_mode']
for ee in range(elem_count):
layer = layer_tups[int(elem_layer_inds[ee])]
vertices = vert_arr[elem_vert_off[ee]:elem_vert_off[ee + 1]]
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
if raw_mode:
poly = Polygon._from_raw(vertices=vertices, annotations=annotations)
else:
poly = Polygon(vertices=vertices, offset=ZERO_OFFSET, annotations=annotations)
pat.shapes[layer].append(poly)

View file

@ -187,6 +187,18 @@ def test_gdsii_arrow_matches_gdsii_readfile(tmp_path: Path) -> None:
assert _library_summary(canonical_lib) == _library_summary(arrow_lib)
def test_gdsii_arrow_matches_gdsii_readfile_for_gzipped_file(tmp_path: Path) -> None:
lib = _make_arrow_test_library()
gds_file = tmp_path / 'arrow_roundtrip.gds.gz'
gdsii.writefile(lib, gds_file, meters_per_unit=1e-9)
canonical_lib, canonical_info = gdsii.readfile(gds_file)
arrow_lib, arrow_info = gdsii_arrow.readfile(gds_file)
assert canonical_info == arrow_info
assert _library_summary(canonical_lib) == _library_summary(arrow_lib)
def test_gdsii_arrow_readfile_arrow_returns_native_payload(tmp_path: Path) -> None:
gds_file = tmp_path / 'many_cells_native.gds'
manifest = write_fixture(gds_file, preset='many_cells', scale=0.001)
@ -199,6 +211,33 @@ def test_gdsii_arrow_readfile_arrow_returns_native_payload(tmp_path: Path) -> No
assert 0 < len(libarr['layers']) <= manifest.layers
def test_gdsii_arrow_readfile_arrow_reads_gzipped_file(tmp_path: Path) -> None:
lib = _make_arrow_test_library()
gds_file = tmp_path / 'native_payload.gds.gz'
gdsii.writefile(lib, gds_file, meters_per_unit=1e-9)
libarr, info = gdsii_arrow.readfile_arrow(gds_file)
assert info['name'] == 'masque-klamath'
assert libarr['lib_name'].as_py() == 'masque-klamath'
assert len(libarr['cells']) == len(lib)
assert len(libarr['layers']) > 0
def test_gdsii_arrow_removed_raw_mode_arg(tmp_path: Path) -> None:
lib = _make_arrow_test_library()
gds_file = tmp_path / 'removed_raw_mode.gds'
gdsii.writefile(lib, gds_file, meters_per_unit=1e-9)
libarr, _ = gdsii_arrow.readfile_arrow(gds_file)
with pytest.raises(TypeError):
gdsii_arrow.readfile(gds_file, raw_mode=False)
with pytest.raises(TypeError):
gdsii_arrow.read_arrow(libarr, raw_mode=False)
def test_gdsii_arrow_reads_small_perf_fixture(tmp_path: Path) -> None:
gds_file = tmp_path / 'many_cells_smoke.gds'
manifest = write_fixture(gds_file, preset='many_cells', scale=0.001)

View file

@ -34,6 +34,78 @@ def _make_small_library() -> Library:
return lib
def _make_complex_ref_library() -> Library:
lib = Library()
leaf = Pattern()
leaf.polygon((1, 0), vertices=[[0, 0], [10, 0], [10, 10], [0, 10]])
lib['leaf'] = leaf
child = Pattern()
child.ref('leaf', offset=(100, 200), rotation=numpy.pi / 2, mirrored=True, scale=1.25)
lib['child'] = child
sibling = Pattern()
sibling.ref(
'leaf',
offset=(-50, 60),
repetition=Grid(a_vector=(20, 0), a_count=3, b_vector=(0, 30), b_count=2),
)
lib['sibling'] = sibling
fanout = Pattern()
fanout.ref('leaf', offset=(0, 0))
fanout.ref('child', offset=(10, 0), mirrored=True, rotation=numpy.pi / 6, scale=1.1)
fanout.ref('leaf', offset=(30, 0), repetition=Grid(a_vector=(5, 0), a_count=2, b_vector=(0, 7), b_count=3))
fanout.ref(
'child',
offset=(40, 0),
mirrored=True,
rotation=numpy.pi / 4,
scale=1.2,
repetition=Grid(a_vector=(9, 0), a_count=2, b_vector=(0, 11), b_count=2),
)
lib['fanout'] = fanout
top = Pattern()
top.ref('child', offset=(500, 600))
top.ref('sibling', offset=(-100, 50), rotation=numpy.pi)
top.ref('fanout', offset=(250, -75))
lib['top'] = top
return lib
def _transform_rows_key(values: numpy.ndarray) -> tuple[tuple[object, ...], ...]:
arr = numpy.asarray(values, dtype=float)
arr = numpy.atleast_2d(arr)
rows = [
(
round(float(row[0]), 8),
round(float(row[1]), 8),
round(float(row[2]), 8),
bool(int(round(float(row[3])))),
round(float(row[4]), 8),
)
for row in arr
]
return tuple(sorted(rows))
def _local_refs_key(refs: dict[str, list[numpy.ndarray]]) -> dict[str, tuple[tuple[object, ...], ...]]:
return {
parent: _transform_rows_key(numpy.concatenate(transforms))
for parent, transforms in refs.items()
}
def _global_refs_key(refs: dict[tuple[str, ...], numpy.ndarray]) -> dict[tuple[str, ...], tuple[tuple[object, ...], ...]]:
return {
path: _transform_rows_key(transforms)
for path, transforms in refs.items()
}
def test_gdsii_lazy_arrow_loads_perf_fixture(tmp_path: Path) -> None:
gds_file = tmp_path / 'many_cells_lazy.gds'
manifest = write_fixture(gds_file, preset='many_cells', scale=0.001)
@ -62,6 +134,19 @@ def test_gdsii_lazy_arrow_local_and_global_refs(tmp_path: Path) -> None:
assert global_refs[('top', 'mid', 'leaf')].shape[0] == 5
def test_gdsii_lazy_arrow_ref_queries_match_eager_reader(tmp_path: Path) -> None:
gds_file = tmp_path / 'complex_refs.gds'
src = _make_complex_ref_library()
gdsii.writefile(src, gds_file, meters_per_unit=1e-9, library_name='lazy-complex-refs')
eager, _ = gdsii.readfile(gds_file)
lazy, _ = gdsii_lazy_arrow.readfile(gds_file)
for name in ('leaf', 'child'):
assert _local_refs_key(lazy.find_refs_local(name)) == _local_refs_key(eager.find_refs_local(name))
assert _global_refs_key(lazy.find_refs_global(name)) == _global_refs_key(eager.find_refs_global(name))
def test_gdsii_lazy_arrow_untouched_write_is_copy_through(tmp_path: Path) -> None:
gds_file = tmp_path / 'copy_source.gds'
src = _make_small_library()