[gdsii_arrow] further improvements to speed
This commit is contained in:
parent
d387066228
commit
28562f73f6
3 changed files with 235 additions and 61 deletions
|
|
@ -5,17 +5,18 @@ import importlib
|
|||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from masque import LibraryError
|
||||
|
||||
|
||||
READERS = {
|
||||
'gdsii': ('masque.file.gdsii', 'readfile'),
|
||||
'gdsii_arrow': ('masque.file.gdsii_arrow', 'readfile'),
|
||||
READERS: dict[str, tuple[str, tuple[str, ...]]] = {
|
||||
'gdsii': ('masque.file.gdsii', ('readfile',)),
|
||||
'gdsii_arrow': ('masque.file.gdsii_arrow', ('readfile', 'arrow_import', 'arrow_convert')),
|
||||
}
|
||||
|
||||
|
||||
def _summarize(path: Path, elapsed_s: float, info: dict[str, object], lib: object) -> dict[str, object]:
|
||||
def _summarize_library(path: Path, elapsed_s: float, info: dict[str, object], lib: object) -> dict[str, object]:
|
||||
assert hasattr(lib, '__len__')
|
||||
assert hasattr(lib, 'tops')
|
||||
tops = lib.tops() # type: ignore[no-any-return, attr-defined]
|
||||
|
|
@ -34,12 +35,50 @@ def _summarize(path: Path, elapsed_s: float, info: dict[str, object], lib: objec
|
|||
}
|
||||
|
||||
|
||||
def _summarize_arrow_import(path: Path, elapsed_s: float, arrow_arr: Any) -> dict[str, object]:
|
||||
libarr = arrow_arr[0]
|
||||
return {
|
||||
'path': str(path),
|
||||
'elapsed_s': elapsed_s,
|
||||
'arrow_rows': len(arrow_arr),
|
||||
'library_name': libarr['lib_name'].as_py(),
|
||||
'cell_count': len(libarr['cells']),
|
||||
'layer_count': len(libarr['layers']),
|
||||
}
|
||||
|
||||
|
||||
def _profile_stage(module: Any, stage: str, path: Path) -> dict[str, object]:
|
||||
start = time.perf_counter()
|
||||
|
||||
if stage == 'readfile':
|
||||
lib, info = module.readfile(path)
|
||||
elapsed_s = time.perf_counter() - start
|
||||
return _summarize_library(path, elapsed_s, info, lib)
|
||||
|
||||
if stage == 'arrow_import':
|
||||
arrow_arr = module._read_to_arrow(path)
|
||||
elapsed_s = time.perf_counter() - start
|
||||
return _summarize_arrow_import(path, elapsed_s, arrow_arr)
|
||||
|
||||
if stage == 'arrow_convert':
|
||||
arrow_arr = module._read_to_arrow(path)
|
||||
libarr = arrow_arr[0]
|
||||
start = time.perf_counter()
|
||||
lib, info = module.read_arrow(libarr)
|
||||
elapsed_s = time.perf_counter() - start
|
||||
return _summarize_library(path, elapsed_s, info, lib)
|
||||
|
||||
raise ValueError(f'Unsupported stage {stage!r}')
|
||||
|
||||
|
||||
def build_arg_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description='Profile GDS readers with a stable end-to-end workload.')
|
||||
parser.add_argument('--reader', choices=sorted(READERS), required=True)
|
||||
parser.add_argument('--stage', default='readfile')
|
||||
parser.add_argument('--path', type=Path, required=True)
|
||||
parser.add_argument('--warmup', type=int, default=1)
|
||||
parser.add_argument('--repeat', type=int, default=1)
|
||||
parser.add_argument('--output-json', type=Path)
|
||||
return parser
|
||||
|
||||
|
||||
|
|
@ -47,26 +86,32 @@ def main(argv: list[str] | None = None) -> int:
|
|||
parser = build_arg_parser()
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
module_name, attr_name = READERS[args.reader]
|
||||
readfile = getattr(importlib.import_module(module_name), attr_name)
|
||||
module_name, stages = READERS[args.reader]
|
||||
if args.stage not in stages:
|
||||
parser.error(f'reader {args.reader!r} only supports stages: {", ".join(stages)}')
|
||||
|
||||
module = importlib.import_module(module_name)
|
||||
path = args.path.expanduser().resolve()
|
||||
|
||||
for _ in range(args.warmup):
|
||||
readfile(path)
|
||||
_profile_stage(module, args.stage, path)
|
||||
|
||||
runs = []
|
||||
for _ in range(args.repeat):
|
||||
start = time.perf_counter()
|
||||
lib, info = readfile(path)
|
||||
elapsed_s = time.perf_counter() - start
|
||||
runs.append(_summarize(path, elapsed_s, info, lib))
|
||||
runs.append(_profile_stage(module, args.stage, path))
|
||||
|
||||
print(json.dumps({
|
||||
payload = {
|
||||
'reader': args.reader,
|
||||
'stage': args.stage,
|
||||
'warmup': args.warmup,
|
||||
'repeat': args.repeat,
|
||||
'runs': runs,
|
||||
}, indent=2, sort_keys=True))
|
||||
}
|
||||
rendered = json.dumps(payload, indent=2, sort_keys=True)
|
||||
if args.output_json is not None:
|
||||
args.output_json.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output_json.write_text(rendered + '\n')
|
||||
print(rendered)
|
||||
return 0
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue