[gdsii_arrow] misc correctness work

This commit is contained in:
Jan Petykiewicz 2026-04-02 13:21:59 -07:00
commit 09fec67a21
2 changed files with 316 additions and 37 deletions

View file

@ -25,12 +25,16 @@ Notes:
"""
from typing import IO, cast, Any
from collections.abc import Iterable, Mapping, Callable
from importlib.machinery import EXTENSION_SUFFIXES
import importlib.util
import io
import mmap
import logging
import os
import pathlib
import gzip
import string
import sys
from pprint import pformat
import numpy
@ -49,9 +53,10 @@ from ..library import LazyLibrary, Library, ILibrary, ILibraryView
logger = logging.getLogger(__name__)
clib = ffi.dlopen('/home/jan/projects/klamath-rs/target/release/libklamath_rs_ext.so')
ffi.cdef('void read_path(char* path, struct ArrowArray* array, struct ArrowSchema* schema);')
clib: Any | None = None
path_cap_map = {
0: Path.Cap.Flush,
@ -65,16 +70,123 @@ def rint_cast(val: ArrayLike) -> NDArray[numpy.int32]:
return numpy.rint(val).astype(numpy.int32)
def _packed_layer_u32_to_pairs(values: NDArray[numpy.unsignedinteger[Any]]) -> NDArray[numpy.int16]:
layer = (values >> numpy.uint32(16)).astype(numpy.uint16).view(numpy.int16)
dtype = (values & numpy.uint32(0xffff)).astype(numpy.uint16).view(numpy.int16)
return numpy.stack((layer, dtype), axis=-1)
def _packed_counts_u32_to_pairs(values: NDArray[numpy.unsignedinteger[Any]]) -> NDArray[numpy.int64]:
a_count = (values >> numpy.uint32(16)).astype(numpy.uint16).astype(numpy.int64)
b_count = (values & numpy.uint32(0xffff)).astype(numpy.uint16).astype(numpy.int64)
return numpy.stack((a_count, b_count), axis=-1)
def _packed_xy_u64_to_pairs(values: NDArray[numpy.unsignedinteger[Any]]) -> NDArray[numpy.int32]:
xx = (values >> numpy.uint64(32)).astype(numpy.uint32).view(numpy.int32)
yy = (values & numpy.uint64(0xffff_ffff)).astype(numpy.uint32).view(numpy.int32)
return numpy.stack((xx, yy), axis=-1)
def _local_library_filename() -> str:
if sys.platform.startswith('linux'):
return 'libklamath_rs_ext.so'
if sys.platform == 'darwin':
return 'libklamath_rs_ext.dylib'
if sys.platform == 'win32':
return 'klamath_rs_ext.dll'
raise OSError(f'Unsupported platform for klamath_rs_ext: {sys.platform!r}')
def _installed_library_candidates() -> list[pathlib.Path]:
candidates: list[pathlib.Path] = []
try:
spec = importlib.util.find_spec('klamath_rs_ext.klamath_rs_ext')
except ModuleNotFoundError:
spec = None
if spec is not None and spec.origin is not None:
candidates.append(pathlib.Path(spec.origin))
try:
pkg_spec = importlib.util.find_spec('klamath_rs_ext')
except ModuleNotFoundError:
pkg_spec = None
if pkg_spec is not None and pkg_spec.submodule_search_locations is not None:
for location in pkg_spec.submodule_search_locations:
pkg_dir = pathlib.Path(location)
for suffix in EXTENSION_SUFFIXES:
candidates.extend(sorted(pkg_dir.glob(f'klamath_rs_ext*{suffix}')))
return candidates
def _repo_library_candidates() -> list[pathlib.Path]:
repo_root = pathlib.Path(__file__).resolve().parents[2]
library_name = _local_library_filename()
return [
repo_root / 'klamath-rs' / 'target' / 'release' / library_name,
repo_root / 'klamath-rs' / 'target' / 'debug' / library_name,
]
def find_klamath_rs_library() -> pathlib.Path | None:
env_path = os.environ.get('KLAMATH_RS_EXT_LIB')
if env_path:
candidate = pathlib.Path(env_path).expanduser()
if candidate.exists():
return candidate.resolve()
seen: set[pathlib.Path] = set()
for candidate in _installed_library_candidates() + _repo_library_candidates():
resolved = candidate.expanduser()
if resolved in seen:
continue
seen.add(resolved)
if resolved.exists():
return resolved.resolve()
return None
def is_available() -> bool:
return find_klamath_rs_library() is not None
def _get_clib() -> Any:
global clib
if clib is None:
lib_path = find_klamath_rs_library()
if lib_path is None:
raise ImportError(
'Could not locate klamath_rs_ext shared library. '
'Build klamath-rs with `cargo build --release --manifest-path klamath-rs/Cargo.toml` '
'or set KLAMATH_RS_EXT_LIB to the built library path.'
)
clib = ffi.dlopen(str(lib_path))
return clib
def _read_annotations(
prop_offs: NDArray[numpy.integer[Any]],
prop_key: NDArray[numpy.integer[Any]],
prop_val: list[str],
ee: int,
) -> annotations_t:
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii >= prop_ff:
return None
return {str(prop_key[off]): [prop_val[off]] for off in range(prop_ii, prop_ff)}
def _read_to_arrow(
filename: str | pathlib.Path,
*args,
**kwargs,
) -> pyarrow.Array:
path = pathlib.Path(filename)
path.resolve()
path = pathlib.Path(filename).expanduser().resolve()
ptr_array = ffi.new('struct ArrowArray[]', 1)
ptr_schema = ffi.new('struct ArrowSchema[]', 1)
clib.read_path(str(path).encode(), ptr_array, ptr_schema)
_get_clib().read_path(str(path).encode(), ptr_array, ptr_schema)
iptr_schema = int(ffi.cast('uintptr_t', ptr_schema))
iptr_array = int(ffi.cast('uintptr_t', ptr_array))
@ -132,7 +244,7 @@ def read_arrow(
"""
library_info = _read_header(libarr)
layer_names_np = libarr['layers'].values.to_numpy().view('i2').reshape((-1, 2))
layer_names_np = _packed_layer_u32_to_pairs(libarr['layers'].values.to_numpy())
layer_tups = [tuple(pair) for pair in layer_names_np]
cell_ids = libarr['cells'].values.field('id').to_numpy()
@ -155,14 +267,14 @@ def read_arrow(
refs = dict(
offsets = rf.offsets.to_numpy(),
targets = rf.values.field('target').to_numpy(),
xy = rf.values.field('xy').to_numpy().view('i4').reshape((-1, 2)),
xy = _packed_xy_u64_to_pairs(rf.values.field('xy').to_numpy()),
invert_y = rf.values.field('invert_y').fill_null(False).to_numpy(zero_copy_only=False),
angle_rad = numpy.rad2deg(rf.values.field('angle_deg').fill_null(0).to_numpy()),
angle_rad = numpy.deg2rad(rf.values.field('angle_deg').fill_null(0).to_numpy()),
scale = rf.values.field('mag').fill_null(1).to_numpy(),
rep_valid = rf.values.field('repetition').is_valid().to_numpy(zero_copy_only=False),
rep_xy0 = rf.values.field('repetition').field('xy0').fill_null(0).to_numpy().view('i4').reshape((-1, 2)),
rep_xy1 = rf.values.field('repetition').field('xy1').fill_null(0).to_numpy().view('i4').reshape((-1, 2)),
rep_counts = rf.values.field('repetition').field('counts').fill_null(0).to_numpy().view('i2').reshape((-1, 2)),
rep_xy0 = _packed_xy_u64_to_pairs(rf.values.field('repetition').field('xy0').fill_null(0).to_numpy()),
rep_xy1 = _packed_xy_u64_to_pairs(rf.values.field('repetition').field('xy1').fill_null(0).to_numpy()),
rep_counts = _packed_counts_u32_to_pairs(rf.values.field('repetition').field('counts').fill_null(0).to_numpy()),
prop_off = rf.values.field('properties').offsets.to_numpy(),
prop_key = rf.values.field('properties').values.field('key').to_numpy(),
prop_val = rf.values.field('properties').values.field('value').to_pylist(),
@ -172,7 +284,7 @@ def read_arrow(
texts = dict(
offsets = txt.offsets.to_numpy(),
layer_inds = txt.values.field('layer').to_numpy(),
xy = txt.values.field('xy').to_numpy().view('i4').reshape((-1, 2)),
xy = _packed_xy_u64_to_pairs(txt.values.field('xy').to_numpy()),
string = txt.values.field('string').to_pylist(),
prop_off = txt.values.field('properties').offsets.to_numpy(),
prop_key = txt.values.field('properties').values.field('key').to_numpy(),
@ -222,9 +334,9 @@ def _read_header(libarr: pyarrow.Array) -> dict[str, Any]:
Read the file header and create the library_info dict.
"""
library_info = dict(
name = libarr['lib_name'],
meters_per_unit = libarr['meters_per_db_unit'],
logical_units_per_unit = libarr['user_units_per_db_unit'],
name = libarr['lib_name'].as_py(),
meters_per_unit = libarr['meters_per_db_unit'].as_py(),
logical_units_per_unit = libarr['user_units_per_db_unit'].as_py(),
)
return library_info
@ -245,6 +357,8 @@ def _grefs_to_mrefs(
elem_count = elem_off[cc + 1] - elem_off[cc]
elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem
prop_offs = elem['prop_off'][elem_slc] # which props belong to each element
elem_targets = targets[elem_slc][:elem_count]
elem_xy = xy[elem_slc][:elem_count]
elem_invert_y = elem['invert_y'][elem_slc][:elem_count]
elem_angle_rad = elem['angle_rad'][elem_slc][:elem_count]
elem_scale = elem['scale'][elem_slc][:elem_count]
@ -255,8 +369,8 @@ def _grefs_to_mrefs(
for ee in range(elem_count):
target = cell_names[targets[ee]]
offset = xy[ee]
target = cell_names[elem_targets[ee]]
offset = elem_xy[ee]
mirr = elem_invert_y[ee]
rot = elem_angle_rad[ee]
mag = elem_scale[ee]
@ -268,11 +382,7 @@ def _grefs_to_mrefs(
a_count, b_count = elem_rep_counts[ee]
rep = Grid(a_vector=a_vector, b_vector=b_vector, a_count=a_count, b_count=b_count)
annotations: None | dict[str, list[int | float | str]] = None
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii < prop_ff:
annotations = {str(prop_key[off]): [prop_val[off]] for off in range(prop_ii, prop_ff)}
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
ref = Ref(offset=offset, mirrored=mirr, rotation=rot, scale=mag, repetition=rep, annotations=annotations)
pat.refs[target].append(ref)
@ -293,19 +403,16 @@ def _texts_to_labels(
elem_count = elem_off[cc + 1] - elem_off[cc]
elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem
prop_offs = elem['prop_off'][elem_slc] # which props belong to each element
elem_xy = xy[elem_slc][:elem_count]
elem_layer_inds = layer_inds[elem_slc][:elem_count]
elem_strings = elem['string'][elem_slc][:elem_count]
for ee in range(elem_count):
layer = layer_tups[elem_layer_inds[ee]]
offset = xy[ee]
offset = elem_xy[ee]
string = elem_strings[ee]
annotations: None | dict[str, list[int | float | str]] = None
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii < prop_ff:
annotations = {str(prop_key[off]): [prop_val[off]] for off in range(prop_ii, prop_ff)}
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
mlabel = Label(string=string, offset=offset, annotations=annotations)
pat.labels[layer].append(mlabel)
@ -345,11 +452,7 @@ def _gpaths_to_mpaths(
else:
cap_extensions = None
annotations: None | dict[str, list[int | float | str]] = None
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii < prop_ff:
annotations = {str(prop_key[off]): [prop_val[off]] for off in range(prop_ii, prop_ff)}
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
path = Path(vertices=vertices, offset=zeros[ee], annotations=annotations, raw=raw_mode,
width=width, cap=cap,cap_extensions=cap_extensions)
pat.shapes[layer].append(path)
@ -406,11 +509,7 @@ def _boundaries_to_polygons(
layer = layer_tups[elem_layer_inds[ee]]
vertices = xy_val[xy_offs[ee]:xy_offs[ee + 1] - 1] # -1 to drop closing point
annotations: None | dict[str, list[int | float | str]] = None
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii < prop_ff:
annotations = {str(prop_key[off]): prop_val[off] for off in range(prop_ii, prop_ff)}
annotations = _read_annotations(prop_offs, prop_key, prop_val, ee)
poly = Polygon(vertices=vertices, offset=zeros[ee], annotations=annotations, raw=raw_mode)
pat.shapes[layer].append(poly)

View file

@ -0,0 +1,180 @@
from pathlib import Path
import numpy
import pytest
pytest.importorskip('pyarrow')
from ..library import Library
from ..pattern import Pattern
from ..repetition import Grid
from ..shapes import Path as MPath
from ..file import gdsii, gdsii_arrow
from ..file.gdsii_perf import write_fixture
if not gdsii_arrow.is_available():
pytest.skip('klamath_rs_ext shared library is not available', allow_module_level=True)
def _annotations_key(annotations: dict[str, list[object]] | None) -> tuple[tuple[str, tuple[object, ...]], ...] | None:
if annotations is None:
return None
return tuple(sorted((key, tuple(values)) for key, values in annotations.items()))
def _coord_key(values: object) -> tuple[int, ...] | tuple[tuple[int, int], ...]:
arr = numpy.rint(numpy.asarray(values, dtype=float)).astype(int)
if arr.ndim == 1:
return tuple(arr.tolist())
return tuple(tuple(row.tolist()) for row in arr)
def _shape_key(shape: object, layer: tuple[int, int]) -> list[tuple[object, ...]]:
if isinstance(shape, MPath):
cap_extensions = None if shape.cap_extensions is None else _coord_key(shape.cap_extensions)
return [(
'path',
layer,
_coord_key(shape.vertices),
_coord_key(shape.offset),
int(round(float(shape.width))),
shape.cap.name,
cap_extensions,
_annotations_key(shape.annotations),
)]
keys = []
for poly in shape.to_polygons():
keys.append((
'polygon',
layer,
_coord_key(poly.vertices),
_coord_key(poly.offset),
_annotations_key(poly.annotations),
))
return keys
def _ref_key(target: str, ref: object) -> tuple[object, ...]:
repetition = None
if ref.repetition is not None:
repetition = (
_coord_key(ref.repetition.a_vector),
int(ref.repetition.a_count),
_coord_key(ref.repetition.b_vector),
int(ref.repetition.b_count),
)
return (
target,
_coord_key(ref.offset),
round(float(ref.rotation), 8),
round(float(ref.scale), 8),
bool(ref.mirrored),
repetition,
_annotations_key(ref.annotations),
)
def _label_key(layer: tuple[int, int], label: object) -> tuple[object, ...]:
return (
layer,
label.string,
_coord_key(label.offset),
_annotations_key(label.annotations),
)
def _pattern_summary(pattern: Pattern) -> dict[str, object]:
shape_keys: list[tuple[object, ...]] = []
for layer, shapes in pattern.shapes.items():
for shape in shapes:
shape_keys.extend(_shape_key(shape, layer))
ref_keys = [
_ref_key(target, ref)
for target, refs in pattern.refs.items()
for ref in refs
]
label_keys = [
_label_key(layer, label)
for layer, labels in pattern.labels.items()
for label in labels
]
return {
'shapes': sorted(shape_keys),
'refs': sorted(ref_keys),
'labels': sorted(label_keys),
}
def _library_summary(lib: Library) -> dict[str, dict[str, object]]:
return {name: _pattern_summary(pattern) for name, pattern in lib.items()}
def _make_arrow_test_library() -> Library:
lib = Library()
leaf = Pattern()
leaf.polygon((1, 0), vertices=[[0, 0], [10, 0], [10, 10], [0, 10]], annotations={'1': ['leaf-poly']})
leaf.polygon((1, 0), vertices=[[20, 0], [30, 0], [30, 10], [20, 10]])
leaf.label((10, 0), string='LEAF', offset=(3, 4), annotations={'10': ['leaf-label']})
lib['leaf'] = leaf
child = Pattern()
child.path(
(2, 0),
vertices=[[0, 0], [15, 5], [30, 5]],
width=6,
cap=MPath.Cap.SquareCustom,
cap_extensions=(2, 4),
annotations={'2': ['child-path']},
)
child.label((11, 0), string='CHILD', offset=(7, 8), annotations={'11': ['child-label']})
child.ref('leaf', offset=(100, 200), rotation=numpy.pi / 2, mirrored=True, scale=1.25, annotations={'12': ['child-ref']})
lib['child'] = child
sibling = Pattern()
sibling.polygon((3, 0), vertices=[[0, 0], [5, 0], [5, 6], [0, 6]])
sibling.label((12, 0), string='SIB', offset=(1, 2), annotations={'13': ['sib-label']})
sibling.ref(
'leaf',
offset=(-50, 60),
repetition=Grid(a_vector=(20, 0), a_count=3, b_vector=(0, 30), b_count=2),
annotations={'14': ['sib-ref']},
)
lib['sibling'] = sibling
top = Pattern()
top.ref('child', offset=(500, 600), annotations={'15': ['top-child-ref']})
top.ref('sibling', offset=(-100, 50), rotation=numpy.pi, annotations={'16': ['top-sibling-ref']})
top.label((13, 0), string='TOP', offset=(0, 0), annotations={'17': ['top-label']})
lib['top'] = top
return lib
def test_gdsii_arrow_matches_gdsii_readfile(tmp_path: Path) -> None:
lib = _make_arrow_test_library()
gds_file = tmp_path / 'arrow_roundtrip.gds'
gdsii.writefile(lib, gds_file, meters_per_unit=1e-9)
canonical_lib, canonical_info = gdsii.readfile(gds_file)
arrow_lib, arrow_info = gdsii_arrow.readfile(gds_file)
assert canonical_info == arrow_info
assert _library_summary(canonical_lib) == _library_summary(arrow_lib)
def test_gdsii_arrow_reads_small_perf_fixture(tmp_path: Path) -> None:
gds_file = tmp_path / 'many_cells_smoke.gds'
manifest = write_fixture(gds_file, preset='many_cells', scale=0.001)
lib, info = gdsii_arrow.readfile(gds_file)
assert info['name'] == manifest.library_name
assert len(lib) == manifest.cells
assert 'TOP' in lib
assert sum(len(refs) for refs in lib['TOP'].refs.values()) > 0