Compare commits

...

9 Commits

Author SHA1 Message Date
7336545f07 [gdsii_arrow] add some TODO notes 2025-04-22 20:22:01 -07:00
4e40e3f829 [gdsii_arrow] use direct access for all element types 2025-04-22 20:20:46 -07:00
79f2088180 [utils.curves] ignore re-import of trapeziod 2025-04-22 20:19:59 -07:00
e89d912ce8 allow annotations to be None
breaking change, but properties are seldom used by anyone afaik
2025-04-21 20:26:34 -07:00
76511b95e6 gdsii_arrow wip 2025-04-21 19:07:26 -07:00
88bd5e897e split out _read_to_arrow
for ease of debugging
2025-04-21 19:07:26 -07:00
dc89491694 actually make use of raw mode 2025-04-21 19:07:26 -07:00
jan
de9714041f add gdsii_arrow 2025-04-21 19:07:26 -07:00
35e28acb89 [polygon] Only call rotate if necessary 2025-04-21 19:07:21 -07:00
7 changed files with 464 additions and 11 deletions

View File

@ -21,6 +21,7 @@ Notes:
"""
from typing import IO, cast, Any
from collections.abc import Iterable, Mapping, Callable
from types import MappingProxyType
import io
import mmap
import logging
@ -52,6 +53,8 @@ path_cap_map = {
4: Path.Cap.SquareCustom,
}
RO_EMPTY_DICT: Mapping[int, bytes] = MappingProxyType({})
def rint_cast(val: ArrayLike) -> NDArray[numpy.int32]:
return numpy.rint(val).astype(numpy.int32)
@ -399,11 +402,15 @@ def _mrefs_to_grefs(refs: dict[str | None, list[Ref]]) -> list[klamath.library.R
return grefs
def _properties_to_annotations(properties: dict[int, bytes]) -> annotations_t:
def _properties_to_annotations(properties: Mapping[int, bytes]) -> annotations_t:
if not properties:
return None
return {str(k): [v.decode()] for k, v in properties.items()}
def _annotations_to_properties(annotations: annotations_t, max_len: int = 126) -> dict[int, bytes]:
def _annotations_to_properties(annotations: annotations_t, max_len: int = 126) -> Mapping[int, bytes]:
if annotations is None:
return RO_EMPTY_DICT
cum_len = 0
props = {}
for key, vals in annotations.items():

440
masque/file/gdsii_arrow.py Normal file
View File

@ -0,0 +1,440 @@
"""
GDSII file format readers and writers using the `TODO` library.
Note that GDSII references follow the same convention as `masque`,
with this order of operations:
1. Mirroring
2. Rotation
3. Scaling
4. Offset and array expansion (no mirroring/rotation/scaling applied to offsets)
Scaling, rotation, and mirroring apply to individual instances, not grid
vectors or offsets.
Notes:
* absolute positioning is not supported
* PLEX is not supported
* ELFLAGS are not supported
* GDS does not support library- or structure-level annotations
* GDS creation/modification/access times are set to 1900-01-01 for reproducibility.
* Gzip modification time is set to 0 (start of current epoch, usually 1970-01-01)
TODO writing
TODO warn on boxes, nodes
"""
from typing import IO, cast, Any
from collections.abc import Iterable, Mapping, Callable
import io
import mmap
import logging
import pathlib
import gzip
import string
from pprint import pformat
import numpy
from numpy.typing import ArrayLike, NDArray
from numpy.testing import assert_equal
import pyarrow
from pyarrow.cffi import ffi
from .utils import is_gzipped, tmpfile
from .. import Pattern, Ref, PatternError, LibraryError, Label, Shape
from ..shapes import Polygon, Path
from ..repetition import Grid
from ..utils import layer_t, annotations_t
from ..library import LazyLibrary, Library, ILibrary, ILibraryView
logger = logging.getLogger(__name__)
clib = ffi.dlopen('/home/jan/projects/klamath-rs/target/release/libklamath_rs_ext.so')
ffi.cdef('void read_path(char* path, struct ArrowArray* array, struct ArrowSchema* schema);')
path_cap_map = {
0: Path.Cap.Flush,
1: Path.Cap.Circle,
2: Path.Cap.Square,
4: Path.Cap.SquareCustom,
}
def rint_cast(val: ArrayLike) -> NDArray[numpy.int32]:
return numpy.rint(val).astype(numpy.int32)
def _read_to_arrow(
filename: str | pathlib.Path,
*args,
**kwargs,
) -> pyarrow.Array:
path = pathlib.Path(filename)
path.resolve()
ptr_array = ffi.new('struct ArrowArray[]', 1)
ptr_schema = ffi.new('struct ArrowSchema[]', 1)
clib.read_path(str(path).encode(), ptr_array, ptr_schema)
iptr_schema = int(ffi.cast('uintptr_t', ptr_schema))
iptr_array = int(ffi.cast('uintptr_t', ptr_array))
arrow_arr = pyarrow.Array._import_from_c(iptr_array, iptr_schema)
return arrow_arr
def readfile(
filename: str | pathlib.Path,
*args,
**kwargs,
) -> tuple[Library, dict[str, Any]]:
"""
Wrapper for `read()` that takes a filename or path instead of a stream.
Will automatically decompress gzipped files.
Args:
filename: Filename to save to.
*args: passed to `read()`
**kwargs: passed to `read()`
"""
arrow_arr = _read_to_arrow(filename)
assert len(arrow_arr) == 1
results = read_arrow(arrow_arr[0])
return results
def read_arrow(
libarr: pyarrow.Array,
raw_mode: bool = True,
) -> tuple[Library, dict[str, Any]]:
"""
# TODO check GDSII file for cycles!
Read a gdsii file and translate it into a dict of Pattern objects. GDSII structures are
translated into Pattern objects; boundaries are translated into polygons, and srefs and arefs
are translated into Ref objects.
Additional library info is returned in a dict, containing:
'name': name of the library
'meters_per_unit': number of meters per database unit (all values are in database units)
'logical_units_per_unit': number of "logical" units displayed by layout tools (typically microns)
per database unit
Args:
stream: Stream to read from.
raw_mode: If True, constructs shapes in raw mode, bypassing most data validation, Default True.
Returns:
- dict of pattern_name:Patterns generated from GDSII structures
- dict of GDSII library info
"""
library_info = _read_header(libarr)
layer_names_np = libarr['layers'].values.to_numpy().view('i2').reshape((-1, 2))
layer_tups = [tuple(pair) for pair in layer_names_np]
cell_ids = libarr['cells'].values.field('id').to_numpy()
cell_names = libarr['cell_names'].as_py()
def get_geom(libarr: pyarrow.Array, geom_type: str) -> dict[str, Any]:
el = libarr['cells'].values.field(geom_type)
elem = dict(
offsets = el.offsets.to_numpy(),
xy_arr = el.values.field('xy').values.to_numpy().reshape((-1, 2)),
xy_off = el.values.field('xy').offsets.to_numpy() // 2,
layer_inds = el.values.field('layer').to_numpy(),
prop_off = el.values.field('properties').offsets.to_numpy(),
prop_key = el.values.field('properties').values.field('key').to_numpy(),
prop_val = el.values.field('properties').values.field('value').to_pylist(),
)
return elem
rf = libarr['cells'].values.field('refs')
refs = dict(
offsets = rf.offsets.to_numpy(),
targets = rf.values.field('target').to_numpy(),
xy = rf.values.field('xy').to_numpy().view('i4').reshape((-1, 2)),
invert_y = rf.values.field('invert_y').fill_null(False).to_numpy(zero_copy_only=False),
angle_rad = numpy.rad2deg(rf.values.field('angle_deg').fill_null(0).to_numpy()),
scale = rf.values.field('mag').fill_null(1).to_numpy(),
rep_valid = rf.values.field('repetition').is_valid().to_numpy(zero_copy_only=False),
rep_xy0 = rf.values.field('repetition').field('xy0').fill_null(0).to_numpy().view('i4').reshape((-1, 2)),
rep_xy1 = rf.values.field('repetition').field('xy1').fill_null(0).to_numpy().view('i4').reshape((-1, 2)),
rep_counts = rf.values.field('repetition').field('counts').fill_null(0).to_numpy().view('i2').reshape((-1, 2)),
prop_off = rf.values.field('properties').offsets.to_numpy(),
prop_key = rf.values.field('properties').values.field('key').to_numpy(),
prop_val = rf.values.field('properties').values.field('value').to_pylist(),
)
txt = libarr['cells'].values.field('texts')
texts = dict(
offsets = txt.offsets.to_numpy(),
layer_inds = txt.values.field('layer').to_numpy(),
xy = txt.values.field('xy').to_numpy().view('i4').reshape((-1, 2)),
string = txt.values.field('string').to_pylist(),
prop_off = txt.values.field('properties').offsets.to_numpy(),
prop_key = txt.values.field('properties').values.field('key').to_numpy(),
prop_val = txt.values.field('properties').values.field('value').to_pylist(),
)
elements = dict(
boundaries = get_geom(libarr, 'boundaries'),
paths = get_geom(libarr, 'paths'),
boxes = get_geom(libarr, 'boxes'),
nodes = get_geom(libarr, 'nodes'),
texts = texts,
refs = refs,
)
paths = libarr['cells'].values.field('paths')
elements['paths'].update(dict(
width = paths.values.field('width').to_numpy(),
path_type = paths.values.field('path_type').to_numpy(),
extensions = numpy.stack((
paths.values.field('extension_start').to_numpy(zero_copy_only=False),
paths.values.field('extension_end').to_numpy(zero_copy_only=False),
), axis=-1),
))
global_args = dict(
cell_names = cell_names,
layer_tups = layer_tups,
raw_mode = raw_mode,
)
mlib = Library()
for cc, cell in enumerate(libarr['cells']):
name = cell_names[cell_ids[cc]]
pat = read_cell(cc, cell, libarr['cell_names'], global_args=global_args, elements=elements)
mlib[name] = pat
return mlib, library_info
def _read_header(libarr: pyarrow.Array) -> dict[str, Any]:
"""
Read the file header and create the library_info dict.
"""
library_info = dict(
name = libarr['lib_name'],
meters_per_unit = libarr['meters_per_db_unit'],
logical_units_per_unit = libarr['user_units_per_db_unit'],
)
return library_info
def read_cell(
cc: int,
cellarr: pyarrow.Array,
cell_names: pyarrow.Array,
elements: dict[str, Any],
global_args: dict[str, Any],
) -> Pattern:
"""
TODO
Read elements from a GDS structure and build a Pattern from them.
Args:
stream: Seekable stream, positioned at a record boundary.
Will be read until an ENDSTR record is consumed.
name: Name of the resulting Pattern
raw_mode: If True, bypass per-shape data validation. Default True.
Returns:
A pattern containing the elements that were read.
"""
pat = Pattern()
_boundaries_to_polygons(pat, global_args, elements['boundaries'], cc)
_gpaths_to_mpaths(pat, global_args, elements['paths'], cc)
_grefs_to_mrefs(pat, global_args, elements['refs'], cc)
_texts_to_labels(pat, global_args, elements['texts'], cc)
return pat
def _grefs_to_mrefs(
pat: Pattern,
global_args: dict[str, Any],
elem: dict[str, Any],
cc: int,
) -> None:
cell_names = global_args['cell_names']
elem_off = elem['offsets'] # which elements belong to each cell
xy = elem['xy']
prop_key = elem['prop_key']
prop_val = elem['prop_val']
targets = elem['targets']
rep_valid = elem['rep_valid']
elem_count = elem_off[cc + 1] - elem_off[cc]
elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem
prop_offs = elem['prop_off'][elem_slc] # which props belong to each element
for ee in range(elem_count):
target = cell_names[targets[ee]]
offset = xy[ee]
mirr = elem['invert_y'][ee]
rot = elem['angle_rad'][ee]
mag = elem['scale'][ee]
rep: None | Grid = None
if rep_valid[ee]:
a_vector = elem['rep_xy0'][ee]
b_vector = elem['rep_xy1'][ee]
a_count, b_count = elem['rep_counts'][ee]
rep = Grid(a_vector=a_vector, b_vector=b_vector, a_count=a_count, b_count=b_count)
annotations: None | dict[int, str] = None
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii < prop_ff:
annotations = {prop_key[off]: prop_val[off] for off in range(prop_ii, prop_ff)}
ref = Ref(offset=offset, mirrored=mirr, rotation=rot, scale=mag, repetition=rep, annotations=annotations)
pat.refs[target].append(ref)
def _texts_to_labels(
pat: Pattern,
global_args: dict[str, Any],
elem: dict[str, Any],
cc: int,
) -> None:
elem_off = elem['offsets'] # which elements belong to each cell
xy = elem['xy']
layer_tups = global_args['layer_tups']
layer_inds = elem['layer_inds']
prop_key = elem['prop_key']
prop_val = elem['prop_val']
elem_count = elem_off[cc + 1] - elem_off[cc]
elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem
prop_offs = elem['prop_off'][elem_slc] # which props belong to each element
for ee in range(elem_count):
layer = layer_tups[layer_inds[ee]]
offset = xy[ee]
string = elem['string'][ee]
annotations: None | dict[int, str] = None
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii < prop_ff:
annotations = {prop_key[off]: prop_val[off] for off in range(prop_ii, prop_ff)}
mlabel = Label(string=string, offset=offset, annotations=annotations)
pat.labels[layer].append(mlabel)
def _gpaths_to_mpaths(
pat: Pattern,
global_args: dict[str, Any],
elem: dict[str, Any],
cc: int,
) -> None:
elem_off = elem['offsets'] # which elements belong to each cell
xy_val = elem['xy_arr']
layer_tups = global_args['layer_tups']
layer_inds = elem['layer_inds']
prop_key = elem['prop_key']
prop_val = elem['prop_val']
elem_count = elem_off[cc + 1] - elem_off[cc]
elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem
xy_offs = elem['xy_off'][elem_slc] # which xy coords belong to each element
prop_offs = elem['prop_off'][elem_slc] # which props belong to each element
zeros = numpy.zeros((elem_count, 2))
raw_mode = global_args['raw_mode']
for ee in range(elem_count):
elem_ind = elem_off[cc] + ee
layer = layer_tups[layer_inds[ee]]
vertices = xy_val[xy_offs[ee]:xy_offs[ee + 1]]
width = elem['width'][elem_ind]
cap_int = elem['path_type'][elem_ind]
cap = path_cap_map[cap_int]
if cap_int == 4:
cap_extensions = elem['extensions'][elem_ind]
else:
cap_extensions = None
annotations: None | dict[int, str] = None
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii < prop_ff:
annotations = {prop_key[off]: prop_val[off] for off in range(prop_ii, prop_ff)}
path = Path(vertices=vertices, offset=zeros[ee], annotations=annotations, raw=raw_mode,
width=width, cap=cap,cap_extensions=cap_extensions)
pat.shapes[layer].append(path)
def _boundaries_to_polygons(
pat: Pattern,
global_args: dict[str, Any],
elem: dict[str, Any],
cc: int,
) -> None:
elem_off = elem['offsets'] # which elements belong to each cell
xy_val = elem['xy_arr']
layer_tups = global_args['layer_tups']
layer_inds = elem['layer_inds']
prop_key = elem['prop_key']
prop_val = elem['prop_val']
elem_count = elem_off[cc + 1] - elem_off[cc]
elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem
xy_offs = elem['xy_off'][elem_slc] # which xy coords belong to each element
prop_offs = elem['prop_off'][elem_slc] # which props belong to each element
zeros = numpy.zeros((elem_count, 2))
raw_mode = global_args['raw_mode']
for ee in range(elem_count):
layer = layer_tups[layer_inds[ee]]
vertices = xy_val[xy_offs[ee]:xy_offs[ee + 1] - 1] # -1 to drop closing point
annotations: None | dict[int, str] = None
prop_ii, prop_ff = prop_offs[ee], prop_offs[ee + 1]
if prop_ii < prop_ff:
annotations = {prop_key[off]: prop_val[off] for off in range(prop_ii, prop_ff)}
poly = Polygon(vertices=vertices, offset=zeros[ee], annotations=annotations, raw=raw_mode)
pat.shapes[layer].append(poly)
def _properties_to_annotations(properties: pyarrow.Array) -> annotations_t:
return {prop['key'].as_py(): prop['value'].as_py() for prop in properties}
def check_valid_names(
names: Iterable[str],
max_length: int = 32,
) -> None:
"""
Check all provided names to see if they're valid GDSII cell names.
Args:
names: Collection of names to check
max_length: Max allowed length
"""
allowed_chars = set(string.ascii_letters + string.digits + '_?$')
bad_chars = [
name for name in names
if not set(name).issubset(allowed_chars)
]
bad_lengths = [
name for name in names
if len(name) > max_length
]
if bad_chars:
logger.error('Names contain invalid characters:\n' + pformat(bad_chars))
if bad_lengths:
logger.error(f'Names too long (>{max_length}:\n' + pformat(bad_chars))
if bad_chars or bad_lengths:
raise LibraryError('Library contains invalid names, see log above')

View File

@ -671,6 +671,8 @@ def repetition_masq2fata(
def annotations_to_properties(annotations: annotations_t) -> list[fatrec.Property]:
#TODO determine is_standard based on key?
if annotations is None:
return []
properties = []
for key, values in annotations.items():
vals = [AString(v) if isinstance(v, str) else v

View File

@ -332,7 +332,7 @@ class Pattern(PortList, AnnotatableImpl, Mirrorable):
))
self.ports = dict(sorted(self.ports.items()))
self.annotations = dict(sorted(self.annotations.items()))
self.annotations = dict(sorted(self.annotations.items())) if self.annotations is not None else None
return self
@ -354,10 +354,13 @@ class Pattern(PortList, AnnotatableImpl, Mirrorable):
for layer, lseq in other_pattern.labels.items():
self.labels[layer].extend(lseq)
annotation_conflicts = set(self.annotations.keys()) & set(other_pattern.annotations.keys())
if annotation_conflicts:
raise PatternError(f'Annotation keys overlap: {annotation_conflicts}')
self.annotations.update(other_pattern.annotations)
if other_pattern.annotations is not None:
if self.annotations is None:
self.annotations = {}
annotation_conflicts = set(self.annotations.keys()) & set(other_pattern.annotations.keys())
if annotation_conflicts:
raise PatternError(f'Annotation keys overlap: {annotation_conflicts}')
self.annotations.update(other_pattern.annotations)
port_conflicts = set(self.ports.keys()) & set(other_pattern.ports.keys())
if port_conflicts:
@ -415,7 +418,7 @@ class Pattern(PortList, AnnotatableImpl, Mirrorable):
elif default_keep:
pat.refs = copy.copy(self.refs)
if annotations is not None:
if annotations is not None and self.annotations is not None:
pat.annotations = {k: v for k, v in self.annotations.items() if annotations(k, v)}
elif default_keep:
pat.annotations = copy.copy(self.annotations)

View File

@ -107,7 +107,8 @@ class Polygon(Shape):
self.offset = offset
self.repetition = repetition
self.annotations = annotations if annotations is not None else {}
self.rotate(rotation)
if rotation:
self.rotate(rotation)
def __deepcopy__(self, memo: dict | None = None) -> 'Polygon':
memo = {} if memo is None else memo

View File

@ -5,7 +5,7 @@ from numpy import pi
try:
from numpy import trapezoid
except ImportError:
from numpy import trapz as trapezoid
from numpy import trapz as trapezoid # type:ignore
def bezier(

View File

@ -5,7 +5,7 @@ from typing import Protocol
layer_t = int | tuple[int, int] | str
annotations_t = dict[str, list[int | float | str]]
annotations_t = dict[str, list[int | float | str]] | None
class SupportsBool(Protocol):