diff --git a/opencl_fdfd/__init__.py b/opencl_fdfd/__init__.py index bd4a6ff..a6f9b28 100644 --- a/opencl_fdfd/__init__.py +++ b/opencl_fdfd/__init__.py @@ -37,7 +37,7 @@ - jinja2 """ -from .main import cg_solver as cg_solver +from .main import cg_solver __author__ = 'Jan Petykiewicz' __version__ = '0.4' diff --git a/opencl_fdfd/csr.py b/opencl_fdfd/csr.py index b84eec0..0f5a837 100644 --- a/opencl_fdfd/csr.py +++ b/opencl_fdfd/csr.py @@ -14,23 +14,23 @@ satisfy the constraints for the 'conjugate gradient' algorithm (positive definite, symmetric) and some that don't. """ -from typing import Any, TYPE_CHECKING +from typing import Dict, Any, Optional import time import logging import numpy from numpy.typing import NDArray, ArrayLike from numpy.linalg import norm -from numpy import complexfloating import pyopencl import pyopencl.array +import scipy + import meanas.fdfd.solvers from . import ops -if TYPE_CHECKING: - import scipy +__author__ = 'Jan Petykiewicz' logger = logging.getLogger(__name__) @@ -58,9 +58,9 @@ def cg( b: ArrayLike, max_iters: int = 10000, err_threshold: float = 1e-6, - context: pyopencl.Context | None = None, - queue: pyopencl.CommandQueue | None = None, - ) -> NDArray[complexfloating]: + context: Optional[pyopencl.Context] = None, + queue: Optional[pyopencl.CommandQueue] = None, + ) -> NDArray: """ General conjugate-gradient solver for sparse matrices, where A @ x = b. @@ -84,10 +84,10 @@ def cg( if queue is None: queue = pyopencl.CommandQueue(context) - def load_field(v: NDArray[numpy.complexfloating], dtype: type = numpy.complex128) -> pyopencl.array.Array: + def load_field(v, dtype=numpy.complex128): return pyopencl.array.to_device(queue, v.astype(dtype)) - r = load_field(numpy.asarray(b)) + r = load_field(b) x = pyopencl.array.zeros_like(r) v = pyopencl.array.zeros_like(r) p = pyopencl.array.zeros_like(r) @@ -98,18 +98,18 @@ def cg( m = CSRMatrix(queue, A) - # - # Generate OpenCL kernels - # + ''' + Generate OpenCL kernels + ''' a_step = ops.create_a_csr(context) xr_step = ops.create_xr_step(context) rhoerr_step = ops.create_rhoerr_step(context) p_step = ops.create_p_step(context) dot = ops.create_dot(context) - # - # Start the solve - # + ''' + Start the solve + ''' start_time2 = time.perf_counter() _, err2 = rhoerr_step(r, []) @@ -139,9 +139,9 @@ def cg( if k % 1000 == 0: logger.info(f'iteration {k}') - # - # Done solving - # + ''' + Done solving + ''' time_elapsed = time.perf_counter() - start_time x = x.get() @@ -160,9 +160,9 @@ def cg( def fdfd_cg_solver( - solver_opts: dict[str, Any] | None = None, - **fdfd_args, - ) -> NDArray[complexfloating]: + solver_opts: Optional[Dict[str, Any]] = None, + **fdfd_args + ) -> NDArray: """ Conjugate gradient FDFD solver using CSR sparse matrices, mainly for testing and development since it's much slower than the solver in main.py. diff --git a/opencl_fdfd/main.py b/opencl_fdfd/main.py index 9e57395..337b4e0 100644 --- a/opencl_fdfd/main.py +++ b/opencl_fdfd/main.py @@ -5,13 +5,14 @@ This file holds the default FDFD solver, which uses an E-field wave operator implemented directly as OpenCL arithmetic (rather than as a matrix). """ + +from typing import List, Optional, cast import time import logging import numpy from numpy.typing import NDArray, ArrayLike from numpy.linalg import norm -from numpy import floating, complexfloating import pyopencl import pyopencl.array @@ -20,21 +21,23 @@ import meanas.fdfd.operators from . import ops +__author__ = 'Jan Petykiewicz' + logger = logging.getLogger(__name__) def cg_solver( omega: complex, - dxes: list[list[NDArray[floating | complexfloating]]], + dxes: List[List[NDArray]], J: ArrayLike, epsilon: ArrayLike, - mu: ArrayLike | None = None, - pec: ArrayLike | None = None, - pmc: ArrayLike | None = None, + mu: Optional[ArrayLike] = None, + pec: Optional[ArrayLike] = None, + pmc: Optional[ArrayLike] = None, adjoint: bool = False, max_iters: int = 40000, err_threshold: float = 1e-6, - context: pyopencl.Context | None = None, + context: Optional[pyopencl.Context] = None, ) -> NDArray: """ OpenCL FDFD solver using the iterative conjugate gradient (cg) method @@ -68,7 +71,7 @@ def cg_solver( shape = [dd.size for dd in dxes[0]] - b = -1j * omega * numpy.asarray(J) + b = -1j * omega * numpy.array(J, copy=False) ''' ** In this comment, I use the following notation: @@ -97,8 +100,7 @@ def cg_solver( We can accomplish all this simply by conjugating everything (except J) and reversing the order of L and R ''' - epsilon = numpy.asarray(epsilon) - + epsilon = numpy.array(epsilon, copy=False) if adjoint: # Conjugate everything dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes] @@ -106,20 +108,23 @@ def cg_solver( epsilon = numpy.conj(epsilon) if mu is not None: mu = numpy.conj(mu) - assert isinstance(epsilon, NDArray[floating] | NDArray[complexfloating]) L, R = meanas.fdfd.operators.e_full_preconditioners(dxes) - b_preconditioned = (R if adjoint else L) @ b - # - # Allocate GPU memory and load in data - # + if adjoint: + b_preconditioned = R @ b + else: + b_preconditioned = L @ b + + ''' + Allocate GPU memory and load in data + ''' if context is None: context = pyopencl.create_some_context(interactive=True) queue = pyopencl.CommandQueue(context) - def load_field(v: NDArray[complexfloating | floating], dtype: type = numpy.complex128) -> pyopencl.array.Array: + def load_field(v, dtype=numpy.complex128): return pyopencl.array.to_device(queue, v.astype(dtype)) r = load_field(b_preconditioned) # load preconditioned b into r @@ -132,31 +137,31 @@ def cg_solver( rho = 1.0 + 0j errs = [] - inv_dxes = [[load_field(1 / numpy.asarray(dd)) for dd in dds] for dds in dxes] - oeps = load_field(-omega * omega * epsilon) + inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes] + oeps = load_field(-omega ** 2 * epsilon) Pl = load_field(L.diagonal()) Pr = load_field(R.diagonal()) if mu is None: invm = load_field(numpy.array([])) else: - invm = load_field(1 / numpy.asarray(mu)) - mu = numpy.asarray(mu) + invm = load_field(1 / numpy.array(mu, copy=False)) + mu = numpy.array(mu, copy=False) if pec is None: gpec = load_field(numpy.array([]), dtype=numpy.int8) else: - gpec = load_field(numpy.asarray(pec, dtype=bool), dtype=numpy.int8) + gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8) if pmc is None: gpmc = load_field(numpy.array([]), dtype=numpy.int8) else: - gpmc = load_field(numpy.asarray(pmc, dtype=bool), dtype=numpy.int8) + gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8) - # - # Generate OpenCL kernels - # - has_mu, has_pec, has_pmc = (qq is not None for qq in (mu, pec, pmc)) + ''' + Generate OpenCL kernels + ''' + has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)] a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc) xr_step = ops.create_xr_step(context) @@ -164,17 +169,12 @@ def cg_solver( p_step = ops.create_p_step(context) dot = ops.create_dot(context) - def a_step( - E: pyopencl.array.Array, - H: pyopencl.array.Array, - p: pyopencl.array.Array, - events: list[pyopencl.Event], - ) -> list[pyopencl.Event]: + def a_step(E, H, p, events): return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events) - # - # Start the solve - # + ''' + Start the solve + ''' start_time2 = time.perf_counter() _, err2 = rhoerr_step(r, []) @@ -207,13 +207,16 @@ def cg_solver( if k % 1000 == 0: logger.info(f'iteration {k}') - # - # Done solving - # + ''' + Done solving + ''' time_elapsed = time.perf_counter() - start_time # Undo preconditioners - x = ((Pl if adjoint else Pr) * x).get() + if adjoint: + x = (Pl * x).get() + else: + x = (Pr * x).get() if success: logger.info('Solve success') diff --git a/opencl_fdfd/ops.py b/opencl_fdfd/ops.py index 335392b..b0e4108 100644 --- a/opencl_fdfd/ops.py +++ b/opencl_fdfd/ops.py @@ -7,11 +7,11 @@ kernels for use by the other solvers. See kernels/ for any of the .cl files loaded in this file. """ -from collections.abc import Callable, Sequence +from typing import List, Callable, Union, Type, Sequence, Optional, Tuple import logging import numpy -from numpy.typing import ArrayLike +from numpy.typing import NDArray, ArrayLike import jinja2 import pyopencl @@ -20,25 +20,17 @@ from pyopencl.elementwise import ElementwiseKernel from pyopencl.reduction import ReductionKernel -from .csr import CSRMatrix - - logger = logging.getLogger(__name__) - -class FDFDError(Exception): - """ Custom error for opencl_fdfd """ - pass - # Create jinja2 env on module load jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) # Return type for the create_opname(...) functions -operation = Callable[..., list[pyopencl.Event]] +operation = Callable[..., List[pyopencl.Event]] def type_to_C( - float_type: type[numpy.floating | numpy.complexfloating], + float_type: Type, ) -> str: """ Returns a string corresponding to the C equivalent of a numpy type. @@ -57,30 +49,29 @@ def type_to_C( numpy.complex128: 'cdouble_t', } if float_type not in types: - raise FDFDError('Unsupported type') + raise Exception('Unsupported type') return types[float_type] - # Type names ctype = type_to_C(numpy.complex128) ctype_bare = 'cdouble' # Preamble for all OpenCL code -preamble = f''' +preamble = ''' #define PYOPENCL_DEFINE_CDOUBLE #include //Defines to clean up operation and type names -#define ctype {ctype_bare}_t -#define zero {ctype_bare}_new(0.0, 0.0) -#define add {ctype_bare}_add -#define sub {ctype_bare}_sub -#define mul {ctype_bare}_mul -''' +#define ctype {ctype}_t +#define zero {ctype}_new(0.0, 0.0) +#define add {ctype}_add +#define sub {ctype}_sub +#define mul {ctype}_mul +'''.format(ctype=ctype_bare) -def ptrs(*args: str) -> list[str]: +def ptrs(*args: str) -> List[str]: return [ctype + ' *' + s for s in args] @@ -129,9 +120,9 @@ def create_a( des = [ctype + ' *inv_de' + a for a in 'xyz'] dhs = [ctype + ' *inv_dh' + a for a in 'xyz'] - # - # Convert p to initial E (ie, apply right preconditioner and PEC) - # + ''' + Convert p to initial E (ie, apply right preconditioner and PEC) + ''' p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec) P2E_kernel = ElementwiseKernel( context, @@ -141,9 +132,9 @@ def create_a( arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg), ) - # - # Calculate intermediate H from intermediate E - # + ''' + Calculate intermediate H from intermediate E + ''' e2h_source = jinja_env.get_template('e2h.cl').render( mu=mu, pmc=pmc, @@ -157,9 +148,9 @@ def create_a( arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des), ) - # - # Calculate final E (including left preconditioner) - # + ''' + Calculate final E (including left preconditioner) + ''' h2e_source = jinja_env.get_template('h2e.cl').render( pec=pec, common_cl=common_source, @@ -178,13 +169,13 @@ def create_a( p: pyopencl.array.Array, idxes: Sequence[Sequence[pyopencl.array.Array]], oeps: pyopencl.array.Array, - inv_mu: pyopencl.array.Array | None, - pec: pyopencl.array.Array | None, - pmc: pyopencl.array.Array | None, + inv_mu: Optional[pyopencl.array.Array], + pec: Optional[pyopencl.array.Array], + pmc: Optional[pyopencl.array.Array], Pl: pyopencl.array.Array, Pr: pyopencl.array.Array, - e: list[pyopencl.Event], - ) -> list[pyopencl.Event]: + e: List[pyopencl.Event], + ) -> List[pyopencl.Event]: e2 = P2E_kernel(E, p, Pr, pec, wait_for=e) e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2]) e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2]) @@ -236,14 +227,14 @@ def create_xr_step(context: pyopencl.Context) -> operation: r: pyopencl.array.Array, v: pyopencl.array.Array, alpha: complex, - e: list[pyopencl.Event], - ) -> list[pyopencl.Event]: + e: List[pyopencl.Event], + ) -> List[pyopencl.Event]: return [xr_kernel(x, p, r, v, alpha, wait_for=e)] return xr_update -def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex, complex]]: +def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex, complex]]: """ Return a function ri_update(r, e) @@ -281,9 +272,9 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex arguments=ctype + ' *r', ) - def ri_update(r: pyopencl.array.Array, e: list[pyopencl.Event]) -> tuple[complex, complex]: + def ri_update(r: pyopencl.array.Array, e: List[pyopencl.Event]) -> Tuple[complex, complex]: g = ri_kernel(r, wait_for=e).astype(ri_dtype).get() - rr, ri, ii = (g[qq] for qq in 'xyz') + rr, ri, ii = [g[q] for q in 'xyz'] rho = rr + 2j * ri - ii err = rr + ii return rho, err @@ -324,7 +315,7 @@ def create_p_step(context: pyopencl.Context) -> operation: p: pyopencl.array.Array, r: pyopencl.array.Array, beta: complex, - e: list[pyopencl.Event]) -> list[pyopencl.Event]: + e: List[pyopencl.Event]) -> List[pyopencl.Event]: return [p_kernel(p, r, beta, wait_for=e)] return p_update @@ -359,7 +350,7 @@ def create_dot(context: pyopencl.Context) -> Callable[..., complex]: def dot( p: pyopencl.array.Array, v: pyopencl.array.Array, - e: list[pyopencl.Event], + e: List[pyopencl.Event], ) -> complex: g = dot_kernel(p, v, wait_for=e) return g.get() @@ -415,11 +406,11 @@ def create_a_csr(context: pyopencl.Context) -> operation: ) def spmv( - v_out: pyopencl.array.Array, - m: CSRMatrix, - v_in: pyopencl.array.Array, - e: list[pyopencl.Event], - ) -> list[pyopencl.Event]: + v_out, + m, + v_in, + e: List[pyopencl.Event], + ) -> List[pyopencl.Event]: return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)] return spmv diff --git a/pyproject.toml b/pyproject.toml index 261c9c7..a9430f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,10 +35,10 @@ classifiers = [ "License :: OSI Approved :: GNU Affero General Public License v3", "Topic :: Scientific/Engineering", ] -requires-python = ">=3.11" +requires-python = ">=3.8" dynamic = ["version"] dependencies = [ - "numpy>=1.26", + "numpy~=1.21", "pyopencl", "jinja2", "meanas>=0.5", @@ -46,51 +46,3 @@ dependencies = [ [tool.hatch.version] path = "opencl_fdfd/__init__.py" - - -[tool.ruff] -exclude = [ - ".git", - "dist", - ] -line-length = 145 -indent-width = 4 -lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -lint.select = [ - "NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG", - "C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT", - "ARG", "PL", "R", "TRY", - "G010", "G101", "G201", "G202", - "Q002", "Q003", "Q004", - ] -lint.ignore = [ - #"ANN001", # No annotation - "ANN002", # *args - "ANN003", # **kwargs - "ANN401", # Any - "ANN101", # self: Self - "SIM108", # single-line if / else assignment - "RET504", # x=y+z; return x - "PIE790", # unnecessary pass - "ISC003", # non-implicit string concatenation - "C408", # dict(x=y) instead of {'x': y} - "PLR09", # Too many xxx - "PLR2004", # magic number - "PLC0414", # import x as x - "TRY003", # Long exception message - ] - - -[[tool.mypy.overrides]] -module = [ - "scipy", - "scipy.optimize", - "scipy.linalg", - "scipy.sparse", - "scipy.sparse.linalg", - "pyopencl", - "pyopencl.array", - "pyopencl.elementwise", - "pyopencl.reduction", - ] -ignore_missing_imports = true