From 2130b015fd15100fbf2c4ed70952ea49a6d096fd Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 3 Jul 2020 13:46:38 -0700 Subject: [PATCH 01/12] readme updates --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 896001e..afca8ac 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ electromagnetic solver implemented in Python and OpenCL. **Capabilities:** * Arbitrary distributions of the following: - * Dielectric constant (```epsilon```) - * Magnetic permeabilty (```mu```) - * Perfect electric conductor (```PEC```) - * Perfect magnetic conductor (```PMC```) + * Dielectric constant (`epsilon`) + * Magnetic permeabilty (`mu`) + * Perfect electric conductor (`PEC`) + * Perfect magnetic conductor (`PMC`) * Variable-sized rectangular grids * Stretched-coordinate PMLs (complex cell sizes allowed) @@ -17,10 +17,10 @@ Currently, only periodic boundary conditions are included. PEC/PMC boundaries can be implemented by drawing PEC/PMC cells near the edges. Bloch boundary conditions are not included but wouldn't be very hard to add. -The default solver ```opencl_fdfd.cg_solver(...)``` located in main.py +The default solver `opencl_fdfd.cg_solver(...)` located in main.py implements the E-field wave operator directly (ie, as a list of OpenCL instructions rather than a matrix). Additionally, there is a slower -(and slightly more versatile) solver in ```csr.py``` which attempts to solve +(and slightly more versatile) solver in `csr.py` which attempts to solve an arbitrary sparse matrix in compressed sparse row (CSR) format using the same conjugate gradient method as the default solver. The CSR solver is significantly slower, but can be very useful for testing alternative @@ -49,14 +49,14 @@ pip install git+https://mpxd.net/code/jan/opencl_fdfd.git@release ## Use -See the documentation for ```opencl_fdfd.cg_solver(...)``` +See the documentation for `opencl_fdfd.cg_solver(...)` (located in ```main.py```) for details about how to call the solver. The FDFD arguments are identical to those in -```fdfd_tools.solvers.generic(...)```, and a few solver-specific +`meanas.solvers.generic(...)`, and a few solver-specific arguments are available. An alternate (slower) FDFD solver and a general gpu-based sparse matrix -solver is available in ```csr.py```. These aren't particularly +solver is available in `csr.py`. These aren't particularly well-optimized, and something like [MAGMA](http://icl.cs.utk.edu/magma/index.html) would probably be a better choice if you absolutely need to solve arbitrary sparse matrices From 66c30e6eab9b862f8803e6f6ec182f709b4e0cae Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 3 Jul 2020 13:48:24 -0700 Subject: [PATCH 02/12] move from fdfd_tools to meanas --- README.md | 4 ++-- opencl_fdfd/__init__.py | 2 +- opencl_fdfd/main.py | 8 ++++---- setup.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index afca8ac..56a5f76 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,11 @@ generalization to multiple GPUs should be pretty straightforward ## Installation **Dependencies:** -* python 3 (written and tested with 3.5) +* python 3 (written and tested with 3.7) * numpy * pyopencl * jinja2 -* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools) (>=0.2) +* [meanas](https://mpxd.net/code/jan/meanas) (>=0.5) Install with pip, via git: diff --git a/opencl_fdfd/__init__.py b/opencl_fdfd/__init__.py index a1d54c6..927f5e2 100644 --- a/opencl_fdfd/__init__.py +++ b/opencl_fdfd/__init__.py @@ -31,7 +31,7 @@ Dependencies: - - fdfd_tools ( https://mpxd.net/code/jan/fdfd_tools ) + - meanas ( https://mpxd.net/code/jan/meanas ) - numpy - pyopencl - jinja2 diff --git a/opencl_fdfd/main.py b/opencl_fdfd/main.py index a48aec0..95cd37a 100644 --- a/opencl_fdfd/main.py +++ b/opencl_fdfd/main.py @@ -15,7 +15,7 @@ from numpy.linalg import norm import pyopencl import pyopencl.array -import fdfd_tools.operators +import meanas.fdfd.operators from . import ops @@ -43,7 +43,7 @@ def cg_solver(omega: complex, OpenCL. All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays, - either use fdfd_tools.vec() or numpy: + either use meanas.vec() or numpy: f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z]))) :param omega: Complex frequency to solve at. @@ -104,7 +104,7 @@ def cg_solver(omega: complex, if mu is not None: mu = numpy.conj(mu) - L, R = fdfd_tools.operators.e_full_preconditioners(dxes) + L, R = meanas.fdfd.operators.e_full_preconditioners(dxes) if adjoint: b_preconditioned = R @ b @@ -221,7 +221,7 @@ def cg_solver(omega: complex, logger.debug('final error {}'.format(errs[-1])) logger.debug('overhead {} sec'.format(start_time2 - start_time)) - A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr() + A0 = meanas.fdfd.operators.e_full(omega, dxes, epsilon, mu).tocsr() if adjoint: # Remember we conjugated all the contents of A earlier A0 = A0.T diff --git a/setup.py b/setup.py index 1cbf8f0..b037fd1 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup(name='opencl_fdfd', 'numpy', 'pyopencl', 'jinja2', - 'fdfd_tools>=0.3', + 'meanas>=0.5', ], extras_require={ }, From 792b161753f75bface5326596dc7d3890b18ed27 Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 3 Jul 2020 13:48:47 -0700 Subject: [PATCH 03/12] avoid importing the package before its installed... --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b037fd1..0c18709 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,12 @@ #!/usr/bin/env python3 from setuptools import setup, find_packages -import opencl_fdfd with open('README.md', 'r') as f: long_description = f.read() setup(name='opencl_fdfd', - version=opencl_fdfd.version, + version='0.3', description='OpenCL FDFD solver', long_description=long_description, long_description_content_type='text/markdown', From 5861767a00f9a6437f3225ab318b600f19aaa9cf Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 11 Jul 2021 17:07:46 -0700 Subject: [PATCH 04/12] depend on meanas instad of fdfd_tools --- opencl_fdfd/csr.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/opencl_fdfd/csr.py b/opencl_fdfd/csr.py index 9761fc0..a34f503 100644 --- a/opencl_fdfd/csr.py +++ b/opencl_fdfd/csr.py @@ -6,7 +6,7 @@ CSRMatrix sparse matrix representation. The FDFD solver (fdfd_cg_solver()) solves an FDFD problem by creating a sparse matrix representing the problem (using -fdfd_tools) and then passing it to cg(), which performs a +meanas) and then passing it to cg(), which performs a conjugate gradient solve. cg() is capable of solving arbitrary sparse matrices which @@ -23,7 +23,7 @@ from numpy.linalg import norm import pyopencl import pyopencl.array -import fdfd_tools.solvers +import meanas.fdfd.solvers from . import ops @@ -158,9 +158,9 @@ def fdfd_cg_solver(solver_opts: Dict[str, Any] = None, Conjugate gradient FDFD solver using CSR sparse matrices, mainly for testing and development since it's much slower than the solver in main.py. - Calls fdfd_tools.solvers.generic(**fdfd_args, - matrix_solver=opencl_fdfd.csr.cg, - matrix_solver_opts=solver_opts) + Calls meanas.fdfd.solvers.generic(**fdfd_args, + matrix_solver=opencl_fdfd.csr.cg, + matrix_solver_opts=solver_opts) :param solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...). Default {}. @@ -172,8 +172,8 @@ def fdfd_cg_solver(solver_opts: Dict[str, Any] = None, if solver_opts is None: solver_opts = dict() - x = fdfd_tools.solvers.generic(matrix_solver=cg, - matrix_solver_opts=solver_opts, - **fdfd_args) + x = meanas.fdfd.solvers.generic(matrix_solver=cg, + matrix_solver_opts=solver_opts, + **fdfd_args) return x From 0ebfa030c4e07c521d2f15898e07fbac469a4c5a Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 11 Jul 2021 17:07:53 -0700 Subject: [PATCH 05/12] README fixes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 56a5f76..9347e21 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ See the documentation for `opencl_fdfd.cg_solver(...)` The FDFD arguments are identical to those in `meanas.solvers.generic(...)`, and a few solver-specific arguments are available. - + An alternate (slower) FDFD solver and a general gpu-based sparse matrix solver is available in `csr.py`. These aren't particularly well-optimized, and something like From 77f53affe76546735889139d2d1d5e41350204bc Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 11 Jul 2021 17:08:17 -0700 Subject: [PATCH 06/12] use VERSION.py instead of importing package before it's installed --- opencl_fdfd/VERSION.py | 4 ++++ opencl_fdfd/__init__.py | 3 ++- setup.py | 5 ++++- 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 opencl_fdfd/VERSION.py diff --git a/opencl_fdfd/VERSION.py b/opencl_fdfd/VERSION.py new file mode 100644 index 0000000..9fa1853 --- /dev/null +++ b/opencl_fdfd/VERSION.py @@ -0,0 +1,4 @@ +""" VERSION defintion. THIS FILE IS MANUALLY PARSED BY setup.py and REQUIRES A SPECIFIC FORMAT """ +__version__ = ''' +0.3 +'''.strip() diff --git a/opencl_fdfd/__init__.py b/opencl_fdfd/__init__.py index 927f5e2..311bdd0 100644 --- a/opencl_fdfd/__init__.py +++ b/opencl_fdfd/__init__.py @@ -41,4 +41,5 @@ from .main import cg_solver __author__ = 'Jan Petykiewicz' -version = '0.3' +from .VERSION import __version__ +version = __version__ diff --git a/setup.py b/setup.py index 0c18709..68dfeb8 100644 --- a/setup.py +++ b/setup.py @@ -5,8 +5,11 @@ from setuptools import setup, find_packages with open('README.md', 'r') as f: long_description = f.read() +with open('opencl_fdfd/VERSION.py', 'rt') as f: + version = f.readlines()[2].strip() + setup(name='opencl_fdfd', - version='0.3', + version=version, description='OpenCL FDFD solver', long_description=long_description, long_description_content_type='text/markdown', From cba31bf081e757a834326c2a642ee06d44cef06d Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 11 Jul 2021 17:08:21 -0700 Subject: [PATCH 07/12] use new email --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 68dfeb8..e758aa2 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setup(name='opencl_fdfd', long_description=long_description, long_description_content_type='text/markdown', author='Jan Petykiewicz', - author_email='anewusername@gmail.com', + author_email='jan@mpxd.net', url='https://mpxd.net/code/jan/opencl_fdfd', packages=find_packages(), package_data={ From e41a71ce6f2b6432beba8152cadc4165eaef8347 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 20 Nov 2022 19:38:21 -0800 Subject: [PATCH 08/12] move to hatch-based build --- opencl_fdfd/LICENSE.md | 1 + opencl_fdfd/README.md | 1 + opencl_fdfd/VERSION.py | 4 ---- opencl_fdfd/__init__.py | 3 +-- opencl_fdfd/main.py | 2 +- opencl_fdfd/py.typed | 0 pyproject.toml | 48 +++++++++++++++++++++++++++++++++++++++++ setup.py | 32 --------------------------- 8 files changed, 52 insertions(+), 39 deletions(-) create mode 120000 opencl_fdfd/LICENSE.md create mode 120000 opencl_fdfd/README.md delete mode 100644 opencl_fdfd/VERSION.py create mode 100644 opencl_fdfd/py.typed create mode 100644 pyproject.toml delete mode 100644 setup.py diff --git a/opencl_fdfd/LICENSE.md b/opencl_fdfd/LICENSE.md new file mode 120000 index 0000000..7eabdb1 --- /dev/null +++ b/opencl_fdfd/LICENSE.md @@ -0,0 +1 @@ +../LICENSE.md \ No newline at end of file diff --git a/opencl_fdfd/README.md b/opencl_fdfd/README.md new file mode 120000 index 0000000..32d46ee --- /dev/null +++ b/opencl_fdfd/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/opencl_fdfd/VERSION.py b/opencl_fdfd/VERSION.py deleted file mode 100644 index 9fa1853..0000000 --- a/opencl_fdfd/VERSION.py +++ /dev/null @@ -1,4 +0,0 @@ -""" VERSION defintion. THIS FILE IS MANUALLY PARSED BY setup.py and REQUIRES A SPECIFIC FORMAT """ -__version__ = ''' -0.3 -'''.strip() diff --git a/opencl_fdfd/__init__.py b/opencl_fdfd/__init__.py index 311bdd0..f5a639b 100644 --- a/opencl_fdfd/__init__.py +++ b/opencl_fdfd/__init__.py @@ -40,6 +40,5 @@ from .main import cg_solver __author__ = 'Jan Petykiewicz' - -from .VERSION import __version__ +__version__ = '0.3' version = __version__ diff --git a/opencl_fdfd/main.py b/opencl_fdfd/main.py index 95cd37a..3de469b 100644 --- a/opencl_fdfd/main.py +++ b/opencl_fdfd/main.py @@ -43,7 +43,7 @@ def cg_solver(omega: complex, OpenCL. All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays, - either use meanas.vec() or numpy: + either use meanas.fdmath.vec() or numpy: f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z]))) :param omega: Complex frequency to solve at. diff --git a/opencl_fdfd/py.typed b/opencl_fdfd/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a9430f5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,48 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "opencl_fdfd" +description = "OpenCL FDFD solver" +readme = "README.md" +license = { file = "LICENSE.md" } +authors = [ + { name="Jan Petykiewicz", email="jan@mpxd.net" }, + ] +homepage = "https://mpxd.net/code/jan/opencl_fdfd" +repository = "https://mpxd.net/code/jan/opencl_fdfd" +keywords = [ + "FDFD", + "finite", + "difference", + "frequency", + "domain", + "simulation", + "optics", + "electromagnetic", + "dielectric", + "PML", + "solver", + "FDTD", + ] +classifiers = [ + "Programming Language :: Python :: 3", + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Manufacturing", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: GNU Affero General Public License v3", + "Topic :: Scientific/Engineering", + ] +requires-python = ">=3.8" +dynamic = ["version"] +dependencies = [ + "numpy~=1.21", + "pyopencl", + "jinja2", + "meanas>=0.5", + ] + +[tool.hatch.version] +path = "opencl_fdfd/__init__.py" diff --git a/setup.py b/setup.py deleted file mode 100644 index e758aa2..0000000 --- a/setup.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 - -from setuptools import setup, find_packages - -with open('README.md', 'r') as f: - long_description = f.read() - -with open('opencl_fdfd/VERSION.py', 'rt') as f: - version = f.readlines()[2].strip() - -setup(name='opencl_fdfd', - version=version, - description='OpenCL FDFD solver', - long_description=long_description, - long_description_content_type='text/markdown', - author='Jan Petykiewicz', - author_email='jan@mpxd.net', - url='https://mpxd.net/code/jan/opencl_fdfd', - packages=find_packages(), - package_data={ - 'opencl_fdfd': ['kernels/*'] - }, - install_requires=[ - 'numpy', - 'pyopencl', - 'jinja2', - 'meanas>=0.5', - ], - extras_require={ - }, - ) - From 81bb1dd2c0d54c84135ffe7e40742148ddf5cb29 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 20 Nov 2022 19:38:34 -0800 Subject: [PATCH 09/12] add caches to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2ec014d..19e276a 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,6 @@ target/ # PyCharm .idea/ + +.mypy_cache/ +.pytest_cache/ From efeb29479b30335adbd44820ff8343b008ca3ea5 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 20 Nov 2022 21:57:43 -0800 Subject: [PATCH 10/12] improve type annotations, formatting, comment styles --- opencl_fdfd/csr.py | 88 ++++++++------- opencl_fdfd/main.py | 78 +++++++------- opencl_fdfd/ops.py | 257 ++++++++++++++++++++++++++++---------------- 3 files changed, 261 insertions(+), 162 deletions(-) diff --git a/opencl_fdfd/csr.py b/opencl_fdfd/csr.py index a34f503..df95822 100644 --- a/opencl_fdfd/csr.py +++ b/opencl_fdfd/csr.py @@ -14,14 +14,16 @@ satisfy the constraints for the 'conjugate gradient' algorithm (positive definite, symmetric) and some that don't. """ -from typing import Dict, Any +from typing import Dict, Any, Optional import time import logging import numpy +from numpy.typing import NDArray, ArrayLike from numpy.linalg import norm import pyopencl import pyopencl.array +import scipy import meanas.fdfd.solvers @@ -33,39 +35,45 @@ __author__ = 'Jan Petykiewicz' logger = logging.getLogger(__name__) -class CSRMatrix(object): +class CSRMatrix: """ Matrix stored in Compressed Sparse Row format, in GPU RAM. """ - row_ptr = None # type: pyopencl.array.Array - col_ind = None # type: pyopencl.array.Array - data = None # type: pyopencl.array.Array + row_ptr: pyopencl.array.Array + col_ind: pyopencl.array.Array + data: pyopencl.array.Array - def __init__(self, - queue: pyopencl.CommandQueue, - m: 'scipy.sparse.csr_matrix'): + def __init__( + self, + queue: pyopencl.CommandQueue, + m: 'scipy.sparse.csr_matrix', + ) -> None: self.row_ptr = pyopencl.array.to_device(queue, m.indptr) self.col_ind = pyopencl.array.to_device(queue, m.indices) self.data = pyopencl.array.to_device(queue, m.data.astype(numpy.complex128)) -def cg(A: 'scipy.sparse.csr_matrix', - b: numpy.ndarray, - max_iters: int = 10000, - err_threshold: float = 1e-6, - context: pyopencl.Context = None, - queue: pyopencl.CommandQueue = None, - ) -> numpy.ndarray: +def cg( + A: 'scipy.sparse.csr_matrix', + b: ArrayLike, + max_iters: int = 10000, + err_threshold: float = 1e-6, + context: Optional[pyopencl.Context] = None, + queue: Optional[pyopencl.CommandQueue] = None, + ) -> NDArray: """ General conjugate-gradient solver for sparse matrices, where A @ x = b. - :param A: Matrix to solve (CSR format) - :param b: Right-hand side vector (dense ndarray) - :param max_iters: Maximum number of iterations - :param err_threshold: Error threshold for successful solve, relative to norm(b) - :param context: PyOpenCL context. Will be created if not given. - :param queue: PyOpenCL command queue. Will be created if not given. - :return: Solution vector x; returned even if solve doesn't converge. + Args: + A: Matrix to solve (CSR format) + b: Right-hand side vector (dense ndarray) + max_iters: Maximum number of iterations + err_threshold: Error threshold for successful solve, relative to norm(b) + context: PyOpenCL context. Will be created if not given. + queue: PyOpenCL command queue. Will be created if not given. + + Returns: + Solution vector x; returned even if solve doesn't converge. """ start_time = time.perf_counter() @@ -151,29 +159,37 @@ def cg(A: 'scipy.sparse.csr_matrix', return x -def fdfd_cg_solver(solver_opts: Dict[str, Any] = None, - **fdfd_args - ) -> numpy.ndarray: +def fdfd_cg_solver( + solver_opts: Optional[Dict[str, Any]] = None, + **fdfd_args + ) -> NDArray: """ Conjugate gradient FDFD solver using CSR sparse matrices, mainly for testing and development since it's much slower than the solver in main.py. - Calls meanas.fdfd.solvers.generic(**fdfd_args, - matrix_solver=opencl_fdfd.csr.cg, - matrix_solver_opts=solver_opts) + Calls meanas.fdfd.solvers.generic( + **fdfd_args, + matrix_solver=opencl_fdfd.csr.cg, + matrix_solver_opts=solver_opts, + ) - :param solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...). - Default {}. - :param fdfd_args: Passed as **fdfd_args to fdfd_tools.solver.generic(...). - Should include all of the arguments **except** matrix_solver and matrix_solver_opts - :return: E-field which solves the system. + Args: + solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...). + Default {}. + fdfd_args: Passed as **fdfd_args to fdfd_tools.solver.generic(...). + Should include all of the arguments **except** matrix_solver and matrix_solver_opts + + Returns: + E-field which solves the system. """ if solver_opts is None: solver_opts = dict() - x = meanas.fdfd.solvers.generic(matrix_solver=cg, - matrix_solver_opts=solver_opts, - **fdfd_args) + x = meanas.fdfd.solvers.generic( + matrix_solver=cg, + matrix_solver_opts=solver_opts, + **fdfd_args, + ) return x diff --git a/opencl_fdfd/main.py b/opencl_fdfd/main.py index 3de469b..d6e0d83 100644 --- a/opencl_fdfd/main.py +++ b/opencl_fdfd/main.py @@ -6,11 +6,12 @@ operator implemented directly as OpenCL arithmetic (rather than as a matrix). """ -from typing import List +from typing import List, Optional, cast import time import logging import numpy +from numpy.typing import NDArray, ArrayLike from numpy.linalg import norm import pyopencl import pyopencl.array @@ -25,18 +26,19 @@ __author__ = 'Jan Petykiewicz' logger = logging.getLogger(__name__) -def cg_solver(omega: complex, - dxes: List[List[numpy.ndarray]], - J: numpy.ndarray, - epsilon: numpy.ndarray, - mu: numpy.ndarray = None, - pec: numpy.ndarray = None, - pmc: numpy.ndarray = None, - adjoint: bool = False, - max_iters: int = 40000, - err_threshold: float = 1e-6, - context: pyopencl.Context = None, - ) -> numpy.ndarray: +def cg_solver( + omega: complex, + dxes: List[List[NDArray]], + J: ArrayLike, + epsilon: ArrayLike, + mu: Optional[ArrayLike] = None, + pec: Optional[ArrayLike] = None, + pmc: Optional[ArrayLike] = None, + adjoint: bool = False, + max_iters: int = 40000, + err_threshold: float = 1e-6, + context: Optional[pyopencl.Context] = None, + ) -> NDArray: """ OpenCL FDFD solver using the iterative conjugate gradient (cg) method and implementing the diagonalized E-field wave operator directly in @@ -46,28 +48,30 @@ def cg_solver(omega: complex, either use meanas.fdmath.vec() or numpy: f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z]))) - :param omega: Complex frequency to solve at. - :param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes) - :param J: Electric current distribution (at E-field locations) - :param epsilon: Dielectric constant distribution (at E-field locations) - :param mu: Magnetic permeability distribution (at H-field locations) - :param pec: Perfect electric conductor distribution - (at E-field locations; non-zero value indicates PEC is present) - :param pmc: Perfect magnetic conductor distribution - (at H-field locations; non-zero value indicates PMC is present) - :param adjoint: If true, solves the adjoint problem. - :param max_iters: Maximum number of iterations. Default 40,000. - :param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success. - Default 1e-6. - :param context: PyOpenCL context to run in. If not given, construct a new context. - :return: E-field which solves the system. Returned even if we did not converge. - """ + Args: + omega: Complex frequency to solve at. + dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes) + J: Electric current distribution (at E-field locations) + epsilon: Dielectric constant distribution (at E-field locations) + mu: Magnetic permeability distribution (at H-field locations) + pec: Perfect electric conductor distribution + (at E-field locations; non-zero value indicates PEC is present) + pmc: Perfect magnetic conductor distribution + (at H-field locations; non-zero value indicates PMC is present) + adjoint: If true, solves the adjoint problem. + max_iters: Maximum number of iterations. Default 40,000. + err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success. + Default 1e-6. + context: PyOpenCL context to run in. If not given, construct a new context. + Returns: + E-field which solves the system. Returned even if we did not converge. + """ start_time = time.perf_counter() - b = -1j * omega * J + shape = [dd.size for dd in dxes[0]] - shape = [d.size for d in dxes[0]] + b = -1j * omega * numpy.array(J, copy=False) ''' ** In this comment, I use the following notation: @@ -96,9 +100,10 @@ def cg_solver(omega: complex, We can accomplish all this simply by conjugating everything (except J) and reversing the order of L and R ''' + epsilon = numpy.array(epsilon, copy=False) if adjoint: # Conjugate everything - dxes = [[numpy.conj(d) for d in dd] for dd in dxes] + dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes] omega = numpy.conj(omega) epsilon = numpy.conj(epsilon) if mu is not None: @@ -132,7 +137,7 @@ def cg_solver(omega: complex, rho = 1.0 + 0j errs = [] - inv_dxes = [[load_field(1 / d) for d in dd] for dd in dxes] + inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes] oeps = load_field(-omega ** 2 * epsilon) Pl = load_field(L.diagonal()) Pr = load_field(R.diagonal()) @@ -140,17 +145,18 @@ def cg_solver(omega: complex, if mu is None: invm = load_field(numpy.array([])) else: - invm = load_field(1 / mu) + invm = load_field(1 / numpy.array(mu, copy=False)) + mu = numpy.array(mu, copy=False) if pec is None: gpec = load_field(numpy.array([]), dtype=numpy.int8) else: - gpec = load_field(pec.astype(bool), dtype=numpy.int8) + gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8) if pmc is None: gpmc = load_field(numpy.array([]), dtype=numpy.int8) else: - gpmc = load_field(pmc.astype(bool), dtype=numpy.int8) + gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8) ''' Generate OpenCL kernels diff --git a/opencl_fdfd/ops.py b/opencl_fdfd/ops.py index 2e0f160..32f2400 100644 --- a/opencl_fdfd/ops.py +++ b/opencl_fdfd/ops.py @@ -7,10 +7,11 @@ kernels for use by the other solvers. See kernels/ for any of the .cl files loaded in this file. """ -from typing import List, Callable +from typing import List, Callable, Union, Type, Sequence, Optional, Tuple import logging import numpy +from numpy.typing import NDArray, ArrayLike import jinja2 import pyopencl @@ -28,12 +29,17 @@ jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) operation = Callable[..., List[pyopencl.Event]] -def type_to_C(float_type: numpy.float32 or numpy.float64) -> str: +def type_to_C( + float_type: Type, + ) -> str: """ Returns a string corresponding to the C equivalent of a numpy type. - :param float_type: numpy type: float32, float64, complex64, complex128 - :return: string containing the corresponding C type (eg. 'double') + Args: + float_type: numpy type: float32, float64, complex64, complex128 + + Returns: + string containing the corresponding C type (eg. 'double') """ types = { numpy.float32: 'float', @@ -68,12 +74,13 @@ def ptrs(*args: str) -> List[str]: return [ctype + ' *' + s for s in args] -def create_a(context: pyopencl.Context, - shape: numpy.ndarray, - mu: bool = False, - pec: bool = False, - pmc: bool = False, - ) -> operation: +def create_a( + context: pyopencl.Context, + shape: ArrayLike, + mu: bool = False, + pec: bool = False, + pmc: bool = False, + ) -> operation: """ Return a function which performs (A @ p), where A is the FDFD wave equation for E-field. @@ -94,12 +101,15 @@ def create_a(context: pyopencl.Context, and returns a list of pyopencl.Event. - :param context: PyOpenCL context - :param shape: Dimensions of the E-field - :param mu: False iff (mu == 1) everywhere - :param pec: False iff no PEC anywhere - :param pmc: False iff no PMC anywhere - :return: Function for computing (A @ p) + Args: + context: PyOpenCL context + shape: Dimensions of the E-field + mu: False iff (mu == 1) everywhere + pec: False iff no PEC anywhere + pmc: False iff no PMC anywhere + + Returns: + Function for computing (A @ p) """ common_source = jinja_env.get_template('common.cl').render(shape=shape) @@ -113,45 +123,67 @@ def create_a(context: pyopencl.Context, Convert p to initial E (ie, apply right preconditioner and PEC) ''' p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec) - P2E_kernel = ElementwiseKernel(context, - name='P2E', - preamble=preamble, - operation=p2e_source, - arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg)) + P2E_kernel = ElementwiseKernel( + context, + name='P2E', + preamble=preamble, + operation=p2e_source, + arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg), + ) ''' Calculate intermediate H from intermediate E ''' - e2h_source = jinja_env.get_template('e2h.cl').render(mu=mu, - pmc=pmc, - common_cl=common_source) - E2H_kernel = ElementwiseKernel(context, - name='E2H', - preamble=preamble, - operation=e2h_source, - arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des)) + e2h_source = jinja_env.get_template('e2h.cl').render( + mu=mu, + pmc=pmc, + common_cl=common_source, + ) + E2H_kernel = ElementwiseKernel( + context, + name='E2H', + preamble=preamble, + operation=e2h_source, + arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des), + ) ''' Calculate final E (including left preconditioner) ''' - h2e_source = jinja_env.get_template('h2e.cl').render(pec=pec, - common_cl=common_source) - H2E_kernel = ElementwiseKernel(context, - name='H2E', - preamble=preamble, - operation=h2e_source, - arguments=', '.join(ptrs('E', 'H', 'oeps', 'Pl') + pec_arg + dhs)) + h2e_source = jinja_env.get_template('h2e.cl').render( + pec=pec, + common_cl=common_source, + ) + H2E_kernel = ElementwiseKernel( + context, + name='H2E', + preamble=preamble, + operation=h2e_source, + arguments=', '.join(ptrs('E', 'H', 'oeps', 'Pl') + pec_arg + dhs), + ) - def spmv(E, H, p, idxes, oeps, inv_mu, pec, pmc, Pl, Pr, e): + def spmv( + E: pyopencl.array.Array, + H: pyopencl.array.Array, + p: pyopencl.array.Array, + idxes: Sequence[Sequence[pyopencl.array.Array]], + oeps: pyopencl.array.Array, + inv_mu: Optional[pyopencl.array.Array], + pec: Optional[pyopencl.array.Array], + pmc: Optional[pyopencl.array.Array], + Pl: pyopencl.array.Array, + Pr: pyopencl.array.Array, + e: List[pyopencl.Event], + ) -> List[pyopencl.Event]: e2 = P2E_kernel(E, p, Pr, pec, wait_for=e) e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2]) e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2]) return [e2] - logger.debug('Preamble: \n{}'.format(preamble)) - logger.debug('p2e: \n{}'.format(p2e_source)) - logger.debug('e2h: \n{}'.format(e2h_source)) - logger.debug('h2e: \n{}'.format(h2e_source)) + logger.debug(f'Preamble: \n{preamble}') + logger.debug(f'p2e: \n{p2e_source}') + logger.debug(f'e2h: \n{e2h_source}') + logger.debug(f'h2e: \n{h2e_source}') return spmv @@ -167,8 +199,11 @@ def create_xr_step(context: pyopencl.Context) -> operation: after waiting for all in the list e and returns a list of pyopencl.Event - :param context: PyOpenCL context - :return: Function for performing x and r updates + Args: + context: PyOpenCL context + + Returns: + Function for performing x and r updates """ update_xr_source = ''' x[i] = add(x[i], mul(alpha, p[i])); @@ -177,19 +212,28 @@ def create_xr_step(context: pyopencl.Context) -> operation: xr_args = ', '.join(ptrs('x', 'p', 'r', 'v') + [ctype + ' alpha']) - xr_kernel = ElementwiseKernel(context, - name='XR', - preamble=preamble, - operation=update_xr_source, - arguments=xr_args) + xr_kernel = ElementwiseKernel( + context, + name='XR', + preamble=preamble, + operation=update_xr_source, + arguments=xr_args, + ) - def xr_update(x, p, r, v, alpha, e): + def xr_update( + x: pyopencl.array.Array, + p: pyopencl.array.Array, + r: pyopencl.array.Array, + v: pyopencl.array.Array, + alpha: complex, + e: List[pyopencl.Event], + ) -> List[pyopencl.Event]: return [xr_kernel(x, p, r, v, alpha, wait_for=e)] return xr_update -def create_rhoerr_step(context: pyopencl.Context) -> operation: +def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex, complex]]: """ Return a function ri_update(r, e) @@ -200,8 +244,11 @@ def create_rhoerr_step(context: pyopencl.Context) -> operation: after waiting for all pyopencl.Event in the list e and returns a list of pyopencl.Event - :param context: PyOpenCL context - :return: Function for performing x and r updates + Args: + context: PyOpenCL context + + Returns: + Function for performing x and r updates """ update_ri_source = ''' @@ -213,16 +260,18 @@ def create_rhoerr_step(context: pyopencl.Context) -> operation: # Use a vector type (double3) to make the reduction simpler ri_dtype = pyopencl.array.vec.double3 - ri_kernel = ReductionKernel(context, - name='RHOERR', - preamble=preamble, - dtype_out=ri_dtype, - neutral='(double3)(0.0, 0.0, 0.0)', - map_expr=update_ri_source, - reduce_expr='a+b', - arguments=ctype + ' *r') + ri_kernel = ReductionKernel( + context, + name='RHOERR', + preamble=preamble, + dtype_out=ri_dtype, + neutral='(double3)(0.0, 0.0, 0.0)', + map_expr=update_ri_source, + reduce_expr='a+b', + arguments=ctype + ' *r', + ) - def ri_update(r, e): + def ri_update(r: pyopencl.array.Array, e: List[pyopencl.Event]) -> Tuple[complex, complex]: g = ri_kernel(r, wait_for=e).astype(ri_dtype).get() rr, ri, ii = [g[q] for q in 'xyz'] rho = rr + 2j * ri - ii @@ -242,48 +291,66 @@ def create_p_step(context: pyopencl.Context) -> operation: after waiting for all pyopencl.Event in the list e and returns a list of pyopencl.Event - :param context: PyOpenCL context - :return: Function for performing the p update + Args: + context: PyOpenCL context + + Returns: + Function for performing the p update """ update_p_source = ''' p[i] = add(r[i], mul(beta, p[i])); ''' p_args = ptrs('p', 'r') + [ctype + ' beta'] - p_kernel = ElementwiseKernel(context, - name='P', - preamble=preamble, - operation=update_p_source, - arguments=', '.join(p_args)) + p_kernel = ElementwiseKernel( + context, + name='P', + preamble=preamble, + operation=update_p_source, + arguments=', '.join(p_args), + ) - def p_update(p, r, beta, e): + def p_update( + p: pyopencl.array.Array, + r: pyopencl.array.Array, + beta: complex, + e: List[pyopencl.Event]) -> List[pyopencl.Event]: return [p_kernel(p, r, beta, wait_for=e)] return p_update -def create_dot(context: pyopencl.Context) -> operation: +def create_dot(context: pyopencl.Context) -> Callable[..., complex]: """ Return a function for performing the dot product p @ v with the signature - dot(p, v, e) -> float + dot(p, v, e) -> complex - :param context: PyOpenCL context - :return: Function for performing the dot product + Args: + context: PyOpenCL context + + Returns: + Function for performing the dot product """ dot_dtype = numpy.complex128 - dot_kernel = ReductionKernel(context, - name='dot', - preamble=preamble, - dtype_out=dot_dtype, - neutral='zero', - map_expr='mul(p[i], v[i])', - reduce_expr='add(a, b)', - arguments=ptrs('p', 'v')) + dot_kernel = ReductionKernel( + context, + name='dot', + preamble=preamble, + dtype_out=dot_dtype, + neutral='zero', + map_expr='mul(p[i], v[i])', + reduce_expr='add(a, b)', + arguments=ptrs('p', 'v'), + ) - def dot(p, v, e): + def dot( + p: pyopencl.array.Array, + v: pyopencl.array.Array, + e: List[pyopencl.Event], + ) -> complex: g = dot_kernel(p, v, wait_for=e) return g.get() @@ -304,8 +371,11 @@ def create_a_csr(context: pyopencl.Context) -> operation: The function waits on all the pyopencl.Event in e before running, and returns a list of pyopencl.Event. - :param context: PyOpenCL context - :return: Function for sparse (M @ v) operation where M is in CSR format + Args: + context: PyOpenCL context + + Returns: + Function for sparse (M @ v) operation where M is in CSR format """ spmv_source = ''' int start = m_row_ptr[i]; @@ -326,13 +396,20 @@ def create_a_csr(context: pyopencl.Context) -> operation: m_args = 'int *m_row_ptr, int *m_col_ind, ' + ctype + ' *m_data' v_in_args = ctype + ' *v_in' - spmv_kernel = ElementwiseKernel(context, - name='csr_spmv', - preamble=preamble, - operation=spmv_source, - arguments=', '.join((v_out_args, m_args, v_in_args))) + spmv_kernel = ElementwiseKernel( + context, + name='csr_spmv', + preamble=preamble, + operation=spmv_source, + arguments=', '.join((v_out_args, m_args, v_in_args)), + ) - def spmv(v_out, m, v_in, e): + def spmv( + v_out, + m, + v_in, + e: List[pyopencl.Event], + ) -> List[pyopencl.Event]: return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)] return spmv From 3d8054ba4030a4f67b96ee737209a5ac820fce57 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 20 Nov 2022 21:57:54 -0800 Subject: [PATCH 11/12] use f-strings everywhere --- opencl_fdfd/csr.py | 20 ++++++++++---------- opencl_fdfd/main.py | 19 ++++++++++--------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/opencl_fdfd/csr.py b/opencl_fdfd/csr.py index df95822..0f5a837 100644 --- a/opencl_fdfd/csr.py +++ b/opencl_fdfd/csr.py @@ -114,11 +114,11 @@ def cg( _, err2 = rhoerr_step(r, []) b_norm = numpy.sqrt(err2) - logging.debug('b_norm check: ', b_norm) + logging.debug(f'b_norm check: {b_norm}') success = False for k in range(max_iters): - logging.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha)) + logging.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}') rho_prev = rho e = xr_step(x, p, r, v, alpha, []) @@ -126,7 +126,7 @@ def cg( errs += [numpy.sqrt(err2) / b_norm] - logging.debug('err {}'.format(errs[-1])) + logging.debug(f'err {errs[-1]}') if errs[-1] < err_threshold: success = True @@ -136,8 +136,8 @@ def cg( e = a_step(v, m, p, e) alpha = rho / dot(p, v, e) - if verbose and k % 1000 == 0: - logging.info('iteration {}'.format(k)) + if k % 1000 == 0: + logger.info(f'iteration {k}') ''' Done solving @@ -150,12 +150,12 @@ def cg( logging.info('Solve success') else: logging.warning('Solve failure') - logging.info('{} iterations in {} sec: {} iterations/sec \ - '.format(k, time_elapsed, k / time_elapsed)) - logging.debug('final error {}'.format(errs[-1])) - logging.debug('overhead {} sec'.format(start_time2 - start_time)) + logging.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec') + logging.debug(f'final error {errs[-1]}') + logging.debug(f'overhead {start_time2 - start_time} sec') - logging.info('Final residual: {}'.format(norm(A @ x - b) / norm(b))) + residual = norm(A @ x - b) / norm(b) + logging.info(f'Final residual: {residual}') return x diff --git a/opencl_fdfd/main.py b/opencl_fdfd/main.py index d6e0d83..337b4e0 100644 --- a/opencl_fdfd/main.py +++ b/opencl_fdfd/main.py @@ -179,13 +179,13 @@ def cg_solver( _, err2 = rhoerr_step(r, []) b_norm = numpy.sqrt(err2) - logging.debug('b_norm check: {}'.format(b_norm)) + logging.debug(f'b_norm check: {b_norm}') success = False for k in range(max_iters): do_print = (k % 100 == 0) if do_print: - logger.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha)) + logger.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}') rho_prev = rho e = xr_step(x, p, r, v, alpha, []) @@ -194,7 +194,7 @@ def cg_solver( errs += [numpy.sqrt(err2) / b_norm] if do_print: - logger.debug('err {}'.format(errs[-1])) + logger.debug(f'err {errs[-1]}') if errs[-1] < err_threshold: success = True @@ -205,7 +205,7 @@ def cg_solver( alpha = rho / dot(p, v, e) if k % 1000 == 0: - logger.info('iteration {}'.format(k)) + logger.info(f'iteration {k}') ''' Done solving @@ -222,15 +222,16 @@ def cg_solver( logger.info('Solve success') else: logger.warning('Solve failure') - logger.info('{} iterations in {} sec: {} iterations/sec \ - '.format(k, time_elapsed, k / time_elapsed)) - logger.debug('final error {}'.format(errs[-1])) - logger.debug('overhead {} sec'.format(start_time2 - start_time)) + logger.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec') + logger.debug(f'final error {errs[-1]}') + logger.debug(f'overhead {start_time2 - start_time} sec') A0 = meanas.fdfd.operators.e_full(omega, dxes, epsilon, mu).tocsr() if adjoint: # Remember we conjugated all the contents of A earlier A0 = A0.T - logger.info('Post-everything residual: {}'.format(norm(A0 @ x - b) / norm(b))) + + residual = norm(A0 @ x - b) / norm(b) + logger.info(f'Post-everything residual: {residual}') return x From b8ea8591065eb4f0e66aa815cc99ff9c5e87f991 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 20 Nov 2022 21:58:48 -0800 Subject: [PATCH 12/12] bump version to 0.4 --- opencl_fdfd/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl_fdfd/__init__.py b/opencl_fdfd/__init__.py index f5a639b..a6f9b28 100644 --- a/opencl_fdfd/__init__.py +++ b/opencl_fdfd/__init__.py @@ -40,5 +40,5 @@ from .main import cg_solver __author__ = 'Jan Petykiewicz' -__version__ = '0.3' +__version__ = '0.4' version = __version__