diff --git a/examples/profile_gdsii_readers.py b/examples/profile_gdsii_readers.py index c77c589..129a391 100644 --- a/examples/profile_gdsii_readers.py +++ b/examples/profile_gdsii_readers.py @@ -5,17 +5,18 @@ import importlib import json import time from pathlib import Path +from typing import Any from masque import LibraryError -READERS = { - 'gdsii': ('masque.file.gdsii', 'readfile'), - 'gdsii_arrow': ('masque.file.gdsii_arrow', 'readfile'), +READERS: dict[str, tuple[str, tuple[str, ...]]] = { + 'gdsii': ('masque.file.gdsii', ('readfile',)), + 'gdsii_arrow': ('masque.file.gdsii_arrow', ('readfile', 'arrow_import', 'arrow_convert')), } -def _summarize(path: Path, elapsed_s: float, info: dict[str, object], lib: object) -> dict[str, object]: +def _summarize_library(path: Path, elapsed_s: float, info: dict[str, object], lib: object) -> dict[str, object]: assert hasattr(lib, '__len__') assert hasattr(lib, 'tops') tops = lib.tops() # type: ignore[no-any-return, attr-defined] @@ -34,12 +35,50 @@ def _summarize(path: Path, elapsed_s: float, info: dict[str, object], lib: objec } +def _summarize_arrow_import(path: Path, elapsed_s: float, arrow_arr: Any) -> dict[str, object]: + libarr = arrow_arr[0] + return { + 'path': str(path), + 'elapsed_s': elapsed_s, + 'arrow_rows': len(arrow_arr), + 'library_name': libarr['lib_name'].as_py(), + 'cell_count': len(libarr['cells']), + 'layer_count': len(libarr['layers']), + } + + +def _profile_stage(module: Any, stage: str, path: Path) -> dict[str, object]: + start = time.perf_counter() + + if stage == 'readfile': + lib, info = module.readfile(path) + elapsed_s = time.perf_counter() - start + return _summarize_library(path, elapsed_s, info, lib) + + if stage == 'arrow_import': + arrow_arr = module._read_to_arrow(path) + elapsed_s = time.perf_counter() - start + return _summarize_arrow_import(path, elapsed_s, arrow_arr) + + if stage == 'arrow_convert': + arrow_arr = module._read_to_arrow(path) + libarr = arrow_arr[0] + start = time.perf_counter() + lib, info = module.read_arrow(libarr) + elapsed_s = time.perf_counter() - start + return _summarize_library(path, elapsed_s, info, lib) + + raise ValueError(f'Unsupported stage {stage!r}') + + def build_arg_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description='Profile GDS readers with a stable end-to-end workload.') parser.add_argument('--reader', choices=sorted(READERS), required=True) + parser.add_argument('--stage', default='readfile') parser.add_argument('--path', type=Path, required=True) parser.add_argument('--warmup', type=int, default=1) parser.add_argument('--repeat', type=int, default=1) + parser.add_argument('--output-json', type=Path) return parser @@ -47,26 +86,32 @@ def main(argv: list[str] | None = None) -> int: parser = build_arg_parser() args = parser.parse_args(argv) - module_name, attr_name = READERS[args.reader] - readfile = getattr(importlib.import_module(module_name), attr_name) + module_name, stages = READERS[args.reader] + if args.stage not in stages: + parser.error(f'reader {args.reader!r} only supports stages: {", ".join(stages)}') + + module = importlib.import_module(module_name) path = args.path.expanduser().resolve() for _ in range(args.warmup): - readfile(path) + _profile_stage(module, args.stage, path) runs = [] for _ in range(args.repeat): - start = time.perf_counter() - lib, info = readfile(path) - elapsed_s = time.perf_counter() - start - runs.append(_summarize(path, elapsed_s, info, lib)) + runs.append(_profile_stage(module, args.stage, path)) - print(json.dumps({ + payload = { 'reader': args.reader, + 'stage': args.stage, 'warmup': args.warmup, 'repeat': args.repeat, 'runs': runs, - }, indent=2, sort_keys=True)) + } + rendered = json.dumps(payload, indent=2, sort_keys=True) + if args.output_json is not None: + args.output_json.parent.mkdir(parents=True, exist_ok=True) + args.output_json.write_text(rendered + '\n') + print(rendered) return 0 diff --git a/masque/file/gdsii_arrow.py b/masque/file/gdsii_arrow.py index 546d575..f6a7739 100644 --- a/masque/file/gdsii_arrow.py +++ b/masque/file/gdsii_arrow.py @@ -57,6 +57,8 @@ ffi.cdef('void read_path(char* path, struct ArrowArray* array, struct ArrowSchem clib: Any | None = None +ZERO_OFFSET = numpy.zeros(2) + path_cap_map = { 0: Path.Cap.Flush, @@ -263,6 +265,29 @@ def read_arrow( ) return elem + def get_boundary_batches(libarr: pyarrow.Array) -> dict[str, Any]: + batches = libarr['cells'].values.field('boundary_batches') + return dict( + offsets = batches.offsets.to_numpy(), + layer_inds = batches.values.field('layer').to_numpy(), + vert_arr = batches.values.field('vertices').values.to_numpy().reshape((-1, 2)), + vert_off = batches.values.field('vertices').offsets.to_numpy() // 2, + poly_off = batches.values.field('vertex_offsets').offsets.to_numpy(), + poly_offsets = batches.values.field('vertex_offsets').values.to_numpy(), + ) + + def get_boundary_props(libarr: pyarrow.Array) -> dict[str, Any]: + boundaries = libarr['cells'].values.field('boundary_props') + return dict( + offsets = boundaries.offsets.to_numpy(), + layer_inds = boundaries.values.field('layer').to_numpy(), + vert_arr = boundaries.values.field('vertices').values.to_numpy().reshape((-1, 2)), + vert_off = boundaries.values.field('vertices').offsets.to_numpy() // 2, + prop_off = boundaries.values.field('properties').offsets.to_numpy(), + prop_key = boundaries.values.field('properties').values.field('key').to_numpy(), + prop_val = boundaries.values.field('properties').values.field('value').to_pylist(), + ) + rf = libarr['cells'].values.field('refs') refs = dict( offsets = rf.offsets.to_numpy(), @@ -292,10 +317,9 @@ def read_arrow( ) elements = dict( - boundaries = get_geom(libarr, 'boundaries'), + boundary_batches = get_boundary_batches(libarr), + boundary_props = get_boundary_props(libarr), paths = get_geom(libarr, 'paths'), - boxes = get_geom(libarr, 'boxes'), - nodes = get_geom(libarr, 'nodes'), texts = texts, refs = refs, ) @@ -320,7 +344,8 @@ def read_arrow( for cc in range(len(libarr['cells'])): name = cell_names[cell_ids[cc]] pat = Pattern() - _boundaries_to_polygons(pat, global_args, elements['boundaries'], cc) + _boundary_batches_to_polygons(pat, global_args, elements['boundary_batches'], cc) + _boundary_props_to_polygons(pat, global_args, elements['boundary_props'], cc) _gpaths_to_mpaths(pat, global_args, elements['paths'], cc) _grefs_to_mrefs(pat, global_args, elements['refs'], cc) _texts_to_labels(pat, global_args, elements['texts'], cc) @@ -366,6 +391,7 @@ def _grefs_to_mrefs( elem_rep_xy1 = elem['rep_xy1'][elem_slc][:elem_count] elem_rep_counts = elem['rep_counts'][elem_slc][:elem_count] rep_valid = elem['rep_valid'][elem_slc][:elem_count] + raw_mode = global_args['raw_mode'] for ee in range(elem_count): @@ -380,10 +406,16 @@ def _grefs_to_mrefs( a_vector = elem_rep_xy0[ee] b_vector = elem_rep_xy1[ee] a_count, b_count = elem_rep_counts[ee] - rep = Grid(a_vector=a_vector, b_vector=b_vector, a_count=a_count, b_count=b_count) + if raw_mode: + rep = Grid._from_raw(a_vector=a_vector, b_vector=b_vector, a_count=a_count, b_count=b_count) + else: + rep = Grid(a_vector=a_vector, b_vector=b_vector, a_count=a_count, b_count=b_count) annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - ref = Ref(offset=offset, mirrored=mirr, rotation=rot, scale=mag, repetition=rep, annotations=annotations) + if raw_mode: + ref = Ref._from_raw(offset=offset, mirrored=mirr, rotation=rot, scale=mag, repetition=rep, annotations=annotations) + else: + ref = Ref(offset=offset, mirrored=mirr, rotation=rot, scale=mag, repetition=rep, annotations=annotations) pat.refs[target].append(ref) @@ -406,6 +438,7 @@ def _texts_to_labels( elem_xy = xy[elem_slc][:elem_count] elem_layer_inds = layer_inds[elem_slc][:elem_count] elem_strings = elem['string'][elem_slc][:elem_count] + raw_mode = global_args['raw_mode'] for ee in range(elem_count): layer = layer_tups[elem_layer_inds[ee]] @@ -413,7 +446,10 @@ def _texts_to_labels( string = elem_strings[ee] annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - mlabel = Label(string=string, offset=offset, annotations=annotations) + if raw_mode: + mlabel = Label._from_raw(string=string, offset=offset, annotations=annotations) + else: + mlabel = Label(string=string, offset=offset, annotations=annotations) pat.labels[layer].append(mlabel) @@ -439,7 +475,6 @@ def _gpaths_to_mpaths( elem_path_types = elem['path_type'][elem_slc][:elem_count] elem_extensions = elem['extensions'][elem_slc][:elem_count] - zeros = numpy.zeros((elem_count, 2)) raw_mode = global_args['raw_mode'] for ee in range(elem_count): layer = layer_tups[elem_layer_inds[ee]] @@ -453,65 +488,78 @@ def _gpaths_to_mpaths( cap_extensions = None annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - path = Path(vertices=vertices, offset=zeros[ee], annotations=annotations, raw=raw_mode, + path = Path(vertices=vertices, offset=ZERO_OFFSET, annotations=annotations, raw=raw_mode, width=width, cap=cap,cap_extensions=cap_extensions) pat.shapes[layer].append(path) -def _boundaries_to_polygons( +def _boundary_batches_to_polygons( pat: Pattern, global_args: dict[str, Any], elem: dict[str, Any], cc: int, ) -> None: elem_off = elem['offsets'] # which elements belong to each cell - xy_val = elem['xy_arr'] + vert_arr = elem['vert_arr'] + vert_off = elem['vert_off'] + layer_inds = elem['layer_inds'] + layer_tups = global_args['layer_tups'] + poly_off = elem['poly_off'] + poly_offsets = elem['poly_offsets'] + + batch_count = elem_off[cc + 1] - elem_off[cc] + if batch_count == 0: + return + + elem_slc = slice(elem_off[cc], elem_off[cc] + batch_count + 1) # +1 to capture ending location for last elem + elem_vert_off = vert_off[elem_slc] + elem_poly_off = poly_off[elem_slc] + elem_layer_inds = layer_inds[elem_slc][:batch_count] + + raw_mode = global_args['raw_mode'] + for bb in range(batch_count): + layer = layer_tups[elem_layer_inds[bb]] + vertices = vert_arr[elem_vert_off[bb]:elem_vert_off[bb + 1]] + vertex_offsets = numpy.asarray(poly_offsets[elem_poly_off[bb]:elem_poly_off[bb + 1]], dtype=numpy.intp) + + if vertex_offsets.size == 1: + poly = Polygon(vertices=vertices, offset=ZERO_OFFSET, annotations=None, raw=raw_mode) + pat.shapes[layer].append(poly) + else: + polys = PolyCollection(vertex_lists=vertices, vertex_offsets=vertex_offsets, offset=ZERO_OFFSET, annotations=None, raw=raw_mode) + pat.shapes[layer].append(polys) + + +def _boundary_props_to_polygons( + pat: Pattern, + global_args: dict[str, Any], + elem: dict[str, Any], + cc: int, + ) -> None: + elem_off = elem['offsets'] + vert_arr = elem['vert_arr'] + vert_off = elem['vert_off'] layer_inds = elem['layer_inds'] layer_tups = global_args['layer_tups'] prop_key = elem['prop_key'] prop_val = elem['prop_val'] elem_count = elem_off[cc + 1] - elem_off[cc] - elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) # +1 to capture ending location for last elem - xy_offs = elem['xy_off'][elem_slc] # which xy coords belong to each element - xy_counts = xy_offs[1:] - xy_offs[:-1] - prop_offs = elem['prop_off'][elem_slc] # which props belong to each element - prop_counts = prop_offs[1:] - prop_offs[:-1] + if elem_count == 0: + return + + elem_slc = slice(elem_off[cc], elem_off[cc] + elem_count + 1) + elem_vert_off = vert_off[elem_slc] + prop_offs = elem['prop_off'][elem_slc] elem_layer_inds = layer_inds[elem_slc][:elem_count] - order = numpy.argsort(elem_layer_inds, stable=True) - unilayer_inds, unilayer_first, unilayer_count = numpy.unique(elem_layer_inds, return_index=True, return_counts=True) - - zeros = numpy.zeros((elem_count, 2)) raw_mode = global_args['raw_mode'] - for layer_ind, ff, nn in zip(unilayer_inds, unilayer_first, unilayer_count, strict=True): - ee_inds = order[ff:ff + nn] - layer = layer_tups[layer_ind] - propless_mask = prop_counts[ee_inds] == 0 - - poly_count_on_layer = propless_mask.sum() - if poly_count_on_layer == 1: - propless_mask[:] = 0 # Never make a 1-element collection - elif poly_count_on_layer > 1: - propless_vert_counts = xy_counts[ee_inds[propless_mask]] - 1 # -1 to drop closing point - vertex_lists = numpy.empty((propless_vert_counts.sum(), 2), dtype=numpy.float64) - vertex_offsets = numpy.cumsum(numpy.concatenate([[0], propless_vert_counts])) - - for ii, ee in enumerate(ee_inds[propless_mask]): - vo = vertex_offsets[ii] - vertex_lists[vo:vo + propless_vert_counts[ii]] = xy_val[xy_offs[ee]:xy_offs[ee + 1] - 1] - - polys = PolyCollection(vertex_lists=vertex_lists, vertex_offsets=vertex_offsets, offset=zeros[ee]) - pat.shapes[layer].append(polys) - - # Handle single polygons - for ee in ee_inds[~propless_mask]: - layer = layer_tups[elem_layer_inds[ee]] - vertices = xy_val[xy_offs[ee]:xy_offs[ee + 1] - 1] # -1 to drop closing point - - annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) - poly = Polygon(vertices=vertices, offset=zeros[ee], annotations=annotations, raw=raw_mode) - pat.shapes[layer].append(poly) + for ee in range(elem_count): + layer = layer_tups[elem_layer_inds[ee]] + vertices = vert_arr[elem_vert_off[ee]:elem_vert_off[ee + 1]] + annotations = _read_annotations(prop_offs, prop_key, prop_val, ee) + poly = Polygon(vertices=vertices, offset=ZERO_OFFSET, annotations=annotations, raw=raw_mode) + pat.shapes[layer].append(poly) #def _properties_to_annotations(properties: pyarrow.Array) -> annotations_t: diff --git a/masque/test/test_gdsii_arrow.py b/masque/test/test_gdsii_arrow.py index 26713e9..b005cd7 100644 --- a/masque/test/test_gdsii_arrow.py +++ b/masque/test/test_gdsii_arrow.py @@ -5,6 +5,7 @@ import pytest pytest.importorskip('pyarrow') +from .. import Ref, Label from ..library import Library from ..pattern import Pattern from ..repetition import Grid @@ -119,7 +120,10 @@ def _make_arrow_test_library() -> Library: leaf = Pattern() leaf.polygon((1, 0), vertices=[[0, 0], [10, 0], [10, 10], [0, 10]], annotations={'1': ['leaf-poly']}) + leaf.polygon((2, 0), vertices=[[40, 0], [50, 0], [50, 10], [40, 10]]) leaf.polygon((1, 0), vertices=[[20, 0], [30, 0], [30, 10], [20, 10]]) + leaf.polygon((1, 0), vertices=[[80, 0], [90, 0], [90, 10], [80, 10]]) + leaf.polygon((2, 0), vertices=[[60, 0], [70, 0], [70, 10], [60, 10]], annotations={'18': ['leaf-poly-2']}) leaf.label((10, 0), string='LEAF', offset=(3, 4), annotations={'10': ['leaf-label']}) lib['leaf'] = leaf @@ -178,3 +182,80 @@ def test_gdsii_arrow_reads_small_perf_fixture(tmp_path: Path) -> None: assert len(lib) == manifest.cells assert 'TOP' in lib assert sum(len(refs) for refs in lib['TOP'].refs.values()) > 0 + + +def test_gdsii_arrow_boundary_batch_schema(tmp_path: Path) -> None: + lib = _make_arrow_test_library() + gds_file = tmp_path / 'arrow_batches.gds' + gdsii.writefile(lib, gds_file, meters_per_unit=1e-9) + + libarr = gdsii_arrow._read_to_arrow(gds_file)[0] + cells = libarr['cells'].values + cell_ids = cells.field('id').to_numpy() + cell_names = libarr['cell_names'].as_py() + layer_table = [ + ((int(layer) >> 16) & 0xFFFF, int(layer) & 0xFFFF) + for layer in libarr['layers'].values.to_numpy() + ] + + leaf_index = next(ii for ii, cell_id in enumerate(cell_ids) if cell_names[cell_id] == 'leaf') + + boundary_batches = cells.field('boundary_batches')[leaf_index].as_py() + boundary_props = cells.field('boundary_props')[leaf_index].as_py() + + assert len(boundary_batches) == 2 + assert len(boundary_props) == 2 + + batch_by_layer = {tuple(layer_table[entry['layer']]): entry for entry in boundary_batches} + assert batch_by_layer[(1, 0)]['vertex_offsets'] == [0, 4] + assert len(batch_by_layer[(1, 0)]['vertices']) == 16 + assert batch_by_layer[(2, 0)]['vertex_offsets'] == [0] + assert len(batch_by_layer[(2, 0)]['vertices']) == 8 + + props_by_layer = {tuple(layer_table[entry['layer']]): entry for entry in boundary_props} + assert sorted(props_by_layer) == [(1, 0), (2, 0)] + assert props_by_layer[(1, 0)]['properties'][0]['value'] == 'leaf-poly' + assert props_by_layer[(2, 0)]['properties'][0]['value'] == 'leaf-poly-2' + + +def test_raw_ref_grid_label_constructors_match_public() -> None: + raw_grid = Grid._from_raw( + a_vector=numpy.array([20, 0]), + a_count=3, + b_vector=numpy.array([0, 30]), + b_count=2, + ) + public_grid = Grid(a_vector=(20, 0), a_count=3, b_vector=(0, 30), b_count=2) + assert raw_grid == public_grid + + raw_ref = Ref._from_raw( + offset=numpy.array([100, 200]), + rotation=numpy.pi / 2, + mirrored=True, + scale=1.25, + repetition=raw_grid, + annotations={'12': ['child-ref']}, + ) + public_ref = Ref( + offset=(100, 200), + rotation=numpy.pi / 2, + mirrored=True, + scale=1.25, + repetition=public_grid, + annotations={'12': ['child-ref']}, + ) + assert raw_ref == public_ref + assert numpy.array_equal(raw_ref.as_transforms(), public_ref.as_transforms()) + + raw_label = Label._from_raw( + 'LEAF', + offset=numpy.array([3, 4]), + annotations={'10': ['leaf-label']}, + ) + public_label = Label( + 'LEAF', + offset=(3, 4), + annotations={'10': ['leaf-label']}, + ) + assert raw_label == public_label + assert numpy.array_equal(raw_label.get_bounds_single(), public_label.get_bounds_single())