5 changed files with 104 additions and 159 deletions
--- a/opencl_fdfd/init.py
+++ b/opencl_fdfd/init.py
@ -37,7 +37,7 @@
    - jinja2
 """
-from .main import cg_solver as cg_solver
+from .main import cg_solver
 __author__ = 'Jan Petykiewicz'
 __version__ = '0.4'
--- a/opencl_fdfd/csr.py
+++ b/opencl_fdfd/csr.py
@ -14,23 +14,23 @@ satisfy the constraints for the 'conjugate gradient' algorithm
 (positive definite, symmetric) and some that don't.
 """
-from typing import Any, TYPE_CHECKING
+from typing import Dict, Any, Optional
 import time
 import logging
 import numpy
 from numpy.typing import NDArray, ArrayLike
 from numpy.linalg import norm
 from numpy import complexfloating
 import pyopencl
 import pyopencl.array
 import scipy
 import meanas.fdfd.solvers
 from . import ops
 if TYPE_CHECKING:
    import scipy
 __author__ = 'Jan Petykiewicz'
 logger = logging.getLogger(__name__)
@ -58,9 +58,9 @@ def cg(
        b: ArrayLike,
        max_iters: int = 10000,
        err_threshold: float = 1e-6,
-        context: pyopencl.Context | None = None,
+        context: Optional[pyopencl.Context] = None,
-        queue: pyopencl.CommandQueue | None = None,
+        queue: Optional[pyopencl.CommandQueue] = None,
-        ) -> NDArray[complexfloating]:
+        ) -> NDArray:
    """
    General conjugate-gradient solver for sparse matrices, where A @ x = b.
@ -84,10 +84,10 @@ def cg(
    if queue is None:
        queue = pyopencl.CommandQueue(context)
-    def load_field(v: NDArray[numpy.complexfloating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
+    def load_field(v, dtype=numpy.complex128):
        return pyopencl.array.to_device(queue, v.astype(dtype))
-    r = load_field(numpy.asarray(b))
+    r = load_field(b)
    x = pyopencl.array.zeros_like(r)
    v = pyopencl.array.zeros_like(r)
    p = pyopencl.array.zeros_like(r)
@ -98,18 +98,18 @@ def cg(
    m = CSRMatrix(queue, A)
-    #
+    '''
-    # Generate OpenCL kernels
+    Generate OpenCL kernels
-    #
+    '''
    a_step = ops.create_a_csr(context)
    xr_step = ops.create_xr_step(context)
    rhoerr_step = ops.create_rhoerr_step(context)
    p_step = ops.create_p_step(context)
    dot = ops.create_dot(context)
-    #
+    '''
-    # Start the solve
+    Start the solve
-    #
+    '''
    start_time2 = time.perf_counter()
    _, err2 = rhoerr_step(r, [])
@ -139,9 +139,9 @@ def cg(
        if k % 1000 == 0:
            logger.info(f'iteration {k}')
-    #
+    '''
-    # Done solving
+    Done solving
-    #
+    '''
    time_elapsed = time.perf_counter() - start_time
    x = x.get()
@ -160,9 +160,9 @@ def cg(
 def fdfd_cg_solver(
-        solver_opts: dict[str, Any] | None = None,
+        solver_opts: Optional[Dict[str, Any]] = None,
-        **fdfd_args,
+        **fdfd_args
-        ) -> NDArray[complexfloating]:
+        ) -> NDArray:
    """
    Conjugate gradient FDFD solver using CSR sparse matrices, mainly for
     testing and development since it's much slower than the solver in main.py.
--- a/opencl_fdfd/main.py
+++ b/opencl_fdfd/main.py
@ -5,13 +5,14 @@ This file holds the default FDFD solver, which uses an E-field wave
 operator implemented directly as OpenCL arithmetic (rather than as
 a matrix).
 """
 from typing import List, Optional, cast
 import time
 import logging
 import numpy
 from numpy.typing import NDArray, ArrayLike
 from numpy.linalg import norm
 from numpy import floating, complexfloating
 import pyopencl
 import pyopencl.array
@ -20,21 +21,23 @@ import meanas.fdfd.operators
 from . import ops
 __author__ = 'Jan Petykiewicz'
 logger = logging.getLogger(__name__)
 def cg_solver(
        omega: complex,
-        dxes: list[list[NDArray[floating | complexfloating]]],
+        dxes: List[List[NDArray]],
        J: ArrayLike,
        epsilon: ArrayLike,
-        mu: ArrayLike | None = None,
+        mu: Optional[ArrayLike] = None,
-        pec: ArrayLike | None = None,
+        pec: Optional[ArrayLike] = None,
-        pmc: ArrayLike | None = None,
+        pmc: Optional[ArrayLike] = None,
        adjoint: bool = False,
        max_iters: int = 40000,
        err_threshold: float = 1e-6,
-        context: pyopencl.Context | None = None,
+        context: Optional[pyopencl.Context] = None,
        ) -> NDArray:
    """
    OpenCL FDFD solver using the iterative conjugate gradient (cg) method
@ -68,7 +71,7 @@ def cg_solver(
    shape = [dd.size for dd in dxes[0]]
-    b = -1j * omega * numpy.asarray(J)
+    b = -1j * omega * numpy.array(J, copy=False)
    '''
        ** In this comment, I use the following notation:
@ -97,8 +100,7 @@ def cg_solver(
        We can accomplish all this simply by conjugating everything (except J) and
         reversing the order of L and R
    '''
-    epsilon = numpy.asarray(epsilon)
+    epsilon = numpy.array(epsilon, copy=False)
    if adjoint:
        # Conjugate everything
        dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes]
@ -106,20 +108,23 @@ def cg_solver(
        epsilon = numpy.conj(epsilon)
        if mu is not None:
            mu = numpy.conj(mu)
    assert isinstance(epsilon, NDArray[floating] | NDArray[complexfloating])
    L, R = meanas.fdfd.operators.e_full_preconditioners(dxes)
    b_preconditioned = (R if adjoint else L) @ b
-    #
+    if adjoint:
-    # Allocate GPU memory and load in data
+        b_preconditioned = R @ b
-    #
+    else:
        b_preconditioned = L @ b
    '''
        Allocate GPU memory and load in data
    '''
    if context is None:
        context = pyopencl.create_some_context(interactive=True)
    queue = pyopencl.CommandQueue(context)
-    def load_field(v: NDArray[complexfloating | floating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
+    def load_field(v, dtype=numpy.complex128):
        return pyopencl.array.to_device(queue, v.astype(dtype))
    r = load_field(b_preconditioned)  # load preconditioned b into r
@ -132,31 +137,31 @@ def cg_solver(
    rho = 1.0 + 0j
    errs = []
-    inv_dxes = [[load_field(1 / numpy.asarray(dd)) for dd in dds] for dds in dxes]
+    inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes]
-    oeps = load_field(-omega * omega * epsilon)
+    oeps = load_field(-omega ** 2 * epsilon)
    Pl = load_field(L.diagonal())
    Pr = load_field(R.diagonal())
    if mu is None:
        invm = load_field(numpy.array([]))
    else:
-        invm = load_field(1 / numpy.asarray(mu))
+        invm = load_field(1 / numpy.array(mu, copy=False))
-        mu = numpy.asarray(mu)
+        mu = numpy.array(mu, copy=False)
    if pec is None:
        gpec = load_field(numpy.array([]), dtype=numpy.int8)
    else:
-        gpec = load_field(numpy.asarray(pec, dtype=bool), dtype=numpy.int8)
+        gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8)
    if pmc is None:
        gpmc = load_field(numpy.array([]), dtype=numpy.int8)
    else:
-        gpmc = load_field(numpy.asarray(pmc, dtype=bool), dtype=numpy.int8)
+        gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8)
-    #
+    '''
-    # Generate OpenCL kernels
+    Generate OpenCL kernels
-    #
+    '''
-    has_mu, has_pec, has_pmc = (qq is not None for qq in (mu, pec, pmc))
+    has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)]
    a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc)
    xr_step = ops.create_xr_step(context)
@ -164,17 +169,12 @@ def cg_solver(
    p_step = ops.create_p_step(context)
    dot = ops.create_dot(context)
-    def a_step(
+    def a_step(E, H, p, events):
            E: pyopencl.array.Array,
            H: pyopencl.array.Array,
            p: pyopencl.array.Array,
            events: list[pyopencl.Event],
            ) -> list[pyopencl.Event]:
        return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events)
-    #
+    '''
-    # Start the solve
+    Start the solve
-    #
+    '''
    start_time2 = time.perf_counter()
    _, err2 = rhoerr_step(r, [])
@ -207,13 +207,16 @@ def cg_solver(
        if k % 1000 == 0:
            logger.info(f'iteration {k}')
-    #
+    '''
-    # Done solving
+    Done solving
-    #
+    '''
    time_elapsed = time.perf_counter() - start_time
    # Undo preconditioners
-    x = ((Pl if adjoint else Pr) * x).get()
+    if adjoint:
        x = (Pl * x).get()
    else:
        x = (Pr * x).get()
    if success:
        logger.info('Solve success')
--- a/opencl_fdfd/ops.py
+++ b/opencl_fdfd/ops.py
@ -7,11 +7,11 @@ kernels for use by the other solvers.
 See kernels/ for any of the .cl files loaded in this file.
 """
-from collections.abc import Callable, Sequence
+from typing import List, Callable, Union, Type, Sequence, Optional, Tuple
 import logging
 import numpy
-from numpy.typing import ArrayLike
+from numpy.typing import NDArray, ArrayLike
 import jinja2
 import pyopencl
@ -20,25 +20,17 @@ from pyopencl.elementwise import ElementwiseKernel
 from pyopencl.reduction import ReductionKernel
 from .csr import CSRMatrix
 logger = logging.getLogger(__name__)
 class FDFDError(Exception):
    """ Custom error for opencl_fdfd """
    pass
 # Create jinja2 env on module load
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
 # Return type for the create_opname(...) functions
-operation = Callable[..., list[pyopencl.Event]]
+operation = Callable[..., List[pyopencl.Event]]
 def type_to_C(
-        float_type: type[numpy.floating | numpy.complexfloating],
+        float_type: Type,
        ) -> str:
    """
    Returns a string corresponding to the C equivalent of a numpy type.
@ -50,37 +42,35 @@ def type_to_C(
        string containing the corresponding C type (eg. 'double')
    """
    types = {
        numpy.float16: 'half',
        numpy.float32: 'float',
        numpy.float64: 'double',
        numpy.complex64: 'cfloat_t',
        numpy.complex128: 'cdouble_t',
    }
    if float_type not in types:
-        raise FDFDError('Unsupported type')
+        raise Exception('Unsupported type')
    return types[float_type]
 # Type names
 ctype = type_to_C(numpy.complex128)
 ctype_bare = 'cdouble'
 # Preamble for all OpenCL code
-preamble = f'''
+preamble = '''
 #define PYOPENCL_DEFINE_CDOUBLE
 #include <pyopencl-complex.h>
 //Defines to clean up operation and type names
-#define ctype {ctype_bare}_t
+#define ctype {ctype}_t
-#define zero {ctype_bare}_new(0.0, 0.0)
+#define zero {ctype}_new(0.0, 0.0)
-#define add {ctype_bare}_add
+#define add {ctype}_add
-#define sub {ctype_bare}_sub
+#define sub {ctype}_sub
-#define mul {ctype_bare}_mul
+#define mul {ctype}_mul
-'''
+'''.format(ctype=ctype_bare)
-def ptrs(*args: str) -> list[str]:
+def ptrs(*args: str) -> List[str]:
    return [ctype + ' *' + s for s in args]
@ -129,9 +119,9 @@ def create_a(
    des = [ctype + ' *inv_de' + a for a in 'xyz']
    dhs = [ctype + ' *inv_dh' + a for a in 'xyz']
-    #
+    '''
-    # Convert p to initial E (ie, apply right preconditioner and PEC)
+    Convert p to initial E (ie, apply right preconditioner and PEC)
-    #
+    '''
    p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec)
    P2E_kernel = ElementwiseKernel(
        context,
@ -141,9 +131,9 @@ def create_a(
        arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg),
        )
-    #
+    '''
-    # Calculate intermediate H from intermediate E
+    Calculate intermediate H from intermediate E
-    #
+    '''
    e2h_source = jinja_env.get_template('e2h.cl').render(
        mu=mu,
        pmc=pmc,
@ -157,9 +147,9 @@ def create_a(
        arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des),
        )
-    #
+    '''
-    # Calculate final E (including left preconditioner)
+    Calculate final E (including left preconditioner)
-    #
+    '''
    h2e_source = jinja_env.get_template('h2e.cl').render(
        pec=pec,
        common_cl=common_source,
@ -178,13 +168,13 @@ def create_a(
            p: pyopencl.array.Array,
            idxes: Sequence[Sequence[pyopencl.array.Array]],
            oeps: pyopencl.array.Array,
-            inv_mu: pyopencl.array.Array | None,
+            inv_mu: Optional[pyopencl.array.Array],
-            pec: pyopencl.array.Array | None,
+            pec: Optional[pyopencl.array.Array],
-            pmc: pyopencl.array.Array | None,
+            pmc: Optional[pyopencl.array.Array],
            Pl: pyopencl.array.Array,
            Pr: pyopencl.array.Array,
-            e: list[pyopencl.Event],
+            e: List[pyopencl.Event],
-            ) -> list[pyopencl.Event]:
+            ) -> List[pyopencl.Event]:
        e2 = P2E_kernel(E, p, Pr, pec, wait_for=e)
        e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2])
        e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2])
@ -236,14 +226,14 @@ def create_xr_step(context: pyopencl.Context) -> operation:
            r: pyopencl.array.Array,
            v: pyopencl.array.Array,
            alpha: complex,
-            e: list[pyopencl.Event],
+            e: List[pyopencl.Event],
-            ) -> list[pyopencl.Event]:
+            ) -> List[pyopencl.Event]:
        return [xr_kernel(x, p, r, v, alpha, wait_for=e)]
    return xr_update
-def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex, complex]]:
+def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex, complex]]:
    """
    Return a function
     ri_update(r, e)
@ -281,9 +271,9 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex
        arguments=ctype + ' *r',
        )
-    def ri_update(r: pyopencl.array.Array, e: list[pyopencl.Event]) -> tuple[complex, complex]:
+    def ri_update(r: pyopencl.array.Array, e: List[pyopencl.Event]) -> Tuple[complex, complex]:
        g = ri_kernel(r, wait_for=e).astype(ri_dtype).get()
-        rr, ri, ii = (g[qq] for qq in 'xyz')
+        rr, ri, ii = [g[q] for q in 'xyz']
        rho = rr + 2j * ri - ii
        err = rr + ii
        return rho, err
@ -324,7 +314,7 @@ def create_p_step(context: pyopencl.Context) -> operation:
            p: pyopencl.array.Array,
            r: pyopencl.array.Array,
            beta: complex,
-            e: list[pyopencl.Event]) -> list[pyopencl.Event]:
+            e: List[pyopencl.Event]) -> List[pyopencl.Event]:
        return [p_kernel(p, r, beta, wait_for=e)]
    return p_update
@ -359,7 +349,7 @@ def create_dot(context: pyopencl.Context) -> Callable[..., complex]:
    def dot(
            p: pyopencl.array.Array,
            v: pyopencl.array.Array,
-            e: list[pyopencl.Event],
+            e: List[pyopencl.Event],
            ) -> complex:
        g = dot_kernel(p, v, wait_for=e)
        return g.get()
@ -415,11 +405,11 @@ def create_a_csr(context: pyopencl.Context) -> operation:
        )
    def spmv(
-            v_out: pyopencl.array.Array,
+            v_out,
-            m: CSRMatrix,
+            m,
-            v_in: pyopencl.array.Array,
+            v_in,
-            e: list[pyopencl.Event],
+            e: List[pyopencl.Event],
-            ) -> list[pyopencl.Event]:
+            ) -> List[pyopencl.Event]:
        return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)]
    return spmv
--- a/pyproject.toml
+++ b/pyproject.toml
@ -35,10 +35,10 @@ classifiers = [
    "License :: OSI Approved :: GNU Affero General Public License v3",
    "Topic :: Scientific/Engineering",
    ]
-requires-python = ">=3.11"
+requires-python = ">=3.8"
 dynamic = ["version"]
 dependencies = [
-    "numpy>=1.26",
+    "numpy~=1.21",
    "pyopencl",
    "jinja2",
    "meanas>=0.5",
@ -46,51 +46,3 @@ dependencies = [
 [tool.hatch.version]
 path = "opencl_fdfd/__init__.py"
 [tool.ruff]
 exclude = [
    ".git",
    "dist",
    ]
 line-length = 145
 indent-width = 4
 lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 lint.select = [
    "NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG",
    "C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT",
    "ARG", "PL", "R", "TRY",
    "G010", "G101", "G201", "G202",
    "Q002", "Q003", "Q004",
    ]
 lint.ignore = [
    #"ANN001",   # No annotation
    "ANN002",   # *args
    "ANN003",   # **kwargs
    "ANN401",   # Any
    "ANN101",   # self: Self
    "SIM108",   # single-line if / else assignment
    "RET504",   # x=y+z; return x
    "PIE790",   # unnecessary pass
    "ISC003",   # non-implicit string concatenation
    "C408",     # dict(x=y) instead of {'x': y}
    "PLR09",    # Too many xxx
    "PLR2004",  # magic number
    "PLC0414",  # import x as x
    "TRY003",   # Long exception message
    ]
 [[tool.mypy.overrides]]
 module = [
    "scipy",
    "scipy.optimize",
    "scipy.linalg",
    "scipy.sparse",
    "scipy.sparse.linalg",
    "pyopencl",
    "pyopencl.array",
    "pyopencl.elementwise",
    "pyopencl.reduction",
    ]
 ignore_missing_imports = true