5 changed files with 104 additions and 159 deletions
--- a/opencl_fdfd/init.py
+++ b/opencl_fdfd/init.py
@ -37,7 +37,7 @@
    - jinja2
 """

-from .main import cg_solver as cg_solver
+from .main import cg_solver

 __author__ = 'Jan Petykiewicz'
 __version__ = '0.4'
--- a/opencl_fdfd/csr.py
+++ b/opencl_fdfd/csr.py
@ -14,23 +14,23 @@ satisfy the constraints for the 'conjugate gradient' algorithm
 (positive definite, symmetric) and some that don't.
 """

-from typing import Any, TYPE_CHECKING
+from typing import Dict, Any, Optional
 import time
 import logging

 import numpy
 from numpy.typing import NDArray, ArrayLike
 from numpy.linalg import norm
-from numpy import complexfloating
 import pyopencl
 import pyopencl.array
+import scipy
+
 import meanas.fdfd.solvers

 from . import ops

-if TYPE_CHECKING:
-    import scipy

+__author__ = 'Jan Petykiewicz'

 logger = logging.getLogger(__name__)

@ -58,9 +58,9 @@ def cg(
        b: ArrayLike,
        max_iters: int = 10000,
        err_threshold: float = 1e-6,
-        context: pyopencl.Context | None = None,
-        queue: pyopencl.CommandQueue | None = None,
-        ) -> NDArray[complexfloating]:
+        context: Optional[pyopencl.Context] = None,
+        queue: Optional[pyopencl.CommandQueue] = None,
+        ) -> NDArray:
    """
    General conjugate-gradient solver for sparse matrices, where A @ x = b.

@ -84,10 +84,10 @@ def cg(
    if queue is None:
        queue = pyopencl.CommandQueue(context)

-    def load_field(v: NDArray[numpy.complexfloating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
+    def load_field(v, dtype=numpy.complex128):
        return pyopencl.array.to_device(queue, v.astype(dtype))

-    r = load_field(numpy.asarray(b))
+    r = load_field(b)
    x = pyopencl.array.zeros_like(r)
    v = pyopencl.array.zeros_like(r)
    p = pyopencl.array.zeros_like(r)
@ -98,18 +98,18 @@ def cg(

    m = CSRMatrix(queue, A)

-    #
-    # Generate OpenCL kernels
-    #
+    '''
+    Generate OpenCL kernels
+    '''
    a_step = ops.create_a_csr(context)
    xr_step = ops.create_xr_step(context)
    rhoerr_step = ops.create_rhoerr_step(context)
    p_step = ops.create_p_step(context)
    dot = ops.create_dot(context)

-    #
-    # Start the solve
-    #
+    '''
+    Start the solve
+    '''
    start_time2 = time.perf_counter()

    _, err2 = rhoerr_step(r, [])
@ -139,9 +139,9 @@ def cg(
        if k % 1000 == 0:
            logger.info(f'iteration {k}')

-    #
-    # Done solving
-    #
+    '''
+    Done solving
+    '''
    time_elapsed = time.perf_counter() - start_time

    x = x.get()
@ -160,9 +160,9 @@ def cg(


 def fdfd_cg_solver(
-        solver_opts: dict[str, Any] | None = None,
-        **fdfd_args,
-        ) -> NDArray[complexfloating]:
+        solver_opts: Optional[Dict[str, Any]] = None,
+        **fdfd_args
+        ) -> NDArray:
    """
    Conjugate gradient FDFD solver using CSR sparse matrices, mainly for
     testing and development since it's much slower than the solver in main.py.
--- a/opencl_fdfd/main.py
+++ b/opencl_fdfd/main.py
@ -5,13 +5,14 @@ This file holds the default FDFD solver, which uses an E-field wave
 operator implemented directly as OpenCL arithmetic (rather than as
 a matrix).
 """
+
+from typing import List, Optional, cast
 import time
 import logging

 import numpy
 from numpy.typing import NDArray, ArrayLike
 from numpy.linalg import norm
-from numpy import floating, complexfloating
 import pyopencl
 import pyopencl.array

@ -20,21 +21,23 @@ import meanas.fdfd.operators
 from . import ops


+__author__ = 'Jan Petykiewicz'
+
 logger = logging.getLogger(__name__)


 def cg_solver(
        omega: complex,
-        dxes: list[list[NDArray[floating | complexfloating]]],
+        dxes: List[List[NDArray]],
        J: ArrayLike,
        epsilon: ArrayLike,
-        mu: ArrayLike | None = None,
-        pec: ArrayLike | None = None,
-        pmc: ArrayLike | None = None,
+        mu: Optional[ArrayLike] = None,
+        pec: Optional[ArrayLike] = None,
+        pmc: Optional[ArrayLike] = None,
        adjoint: bool = False,
        max_iters: int = 40000,
        err_threshold: float = 1e-6,
-        context: pyopencl.Context | None = None,
+        context: Optional[pyopencl.Context] = None,
        ) -> NDArray:
    """
    OpenCL FDFD solver using the iterative conjugate gradient (cg) method
@ -68,7 +71,7 @@ def cg_solver(

    shape = [dd.size for dd in dxes[0]]

-    b = -1j * omega * numpy.asarray(J)
+    b = -1j * omega * numpy.array(J, copy=False)

    '''
        ** In this comment, I use the following notation:
@ -97,8 +100,7 @@ def cg_solver(
        We can accomplish all this simply by conjugating everything (except J) and
         reversing the order of L and R
    '''
-    epsilon = numpy.asarray(epsilon)
-
+    epsilon = numpy.array(epsilon, copy=False)
    if adjoint:
        # Conjugate everything
        dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes]
@ -106,20 +108,23 @@ def cg_solver(
        epsilon = numpy.conj(epsilon)
        if mu is not None:
            mu = numpy.conj(mu)
-    assert isinstance(epsilon, NDArray[floating] | NDArray[complexfloating])

    L, R = meanas.fdfd.operators.e_full_preconditioners(dxes)
-    b_preconditioned = (R if adjoint else L) @ b

-    #
-    # Allocate GPU memory and load in data
-    #
+    if adjoint:
+        b_preconditioned = R @ b
+    else:
+        b_preconditioned = L @ b
+
+    '''
+        Allocate GPU memory and load in data
+    '''
    if context is None:
        context = pyopencl.create_some_context(interactive=True)

    queue = pyopencl.CommandQueue(context)

-    def load_field(v: NDArray[complexfloating | floating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
+    def load_field(v, dtype=numpy.complex128):
        return pyopencl.array.to_device(queue, v.astype(dtype))

    r = load_field(b_preconditioned)  # load preconditioned b into r
@ -132,31 +137,31 @@ def cg_solver(
    rho = 1.0 + 0j
    errs = []

-    inv_dxes = [[load_field(1 / numpy.asarray(dd)) for dd in dds] for dds in dxes]
-    oeps = load_field(-omega * omega * epsilon)
+    inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes]
+    oeps = load_field(-omega ** 2 * epsilon)
    Pl = load_field(L.diagonal())
    Pr = load_field(R.diagonal())

    if mu is None:
        invm = load_field(numpy.array([]))
    else:
-        invm = load_field(1 / numpy.asarray(mu))
-        mu = numpy.asarray(mu)
+        invm = load_field(1 / numpy.array(mu, copy=False))
+        mu = numpy.array(mu, copy=False)

    if pec is None:
        gpec = load_field(numpy.array([]), dtype=numpy.int8)
    else:
-        gpec = load_field(numpy.asarray(pec, dtype=bool), dtype=numpy.int8)
+        gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8)

    if pmc is None:
        gpmc = load_field(numpy.array([]), dtype=numpy.int8)
    else:
-        gpmc = load_field(numpy.asarray(pmc, dtype=bool), dtype=numpy.int8)
+        gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8)

-    #
-    # Generate OpenCL kernels
-    #
-    has_mu, has_pec, has_pmc = (qq is not None for qq in (mu, pec, pmc))
+    '''
+    Generate OpenCL kernels
+    '''
+    has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)]

    a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc)
    xr_step = ops.create_xr_step(context)
@ -164,17 +169,12 @@ def cg_solver(
    p_step = ops.create_p_step(context)
    dot = ops.create_dot(context)

-    def a_step(
-            E: pyopencl.array.Array,
-            H: pyopencl.array.Array,
-            p: pyopencl.array.Array,
-            events: list[pyopencl.Event],
-            ) -> list[pyopencl.Event]:
+    def a_step(E, H, p, events):
        return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events)

-    #
-    # Start the solve
-    #
+    '''
+    Start the solve
+    '''
    start_time2 = time.perf_counter()

    _, err2 = rhoerr_step(r, [])
@ -207,13 +207,16 @@ def cg_solver(
        if k % 1000 == 0:
            logger.info(f'iteration {k}')

-    #
-    # Done solving
-    #
+    '''
+    Done solving
+    '''
    time_elapsed = time.perf_counter() - start_time

    # Undo preconditioners
-    x = ((Pl if adjoint else Pr) * x).get()
+    if adjoint:
+        x = (Pl * x).get()
+    else:
+        x = (Pr * x).get()

    if success:
        logger.info('Solve success')
--- a/opencl_fdfd/ops.py
+++ b/opencl_fdfd/ops.py
@ -7,11 +7,11 @@ kernels for use by the other solvers.
 See kernels/ for any of the .cl files loaded in this file.
 """

-from collections.abc import Callable, Sequence
+from typing import List, Callable, Union, Type, Sequence, Optional, Tuple
 import logging

 import numpy
-from numpy.typing import ArrayLike
+from numpy.typing import NDArray, ArrayLike
 import jinja2

 import pyopencl
@ -20,25 +20,17 @@ from pyopencl.elementwise import ElementwiseKernel
 from pyopencl.reduction import ReductionKernel


-from .csr import CSRMatrix
-
-
 logger = logging.getLogger(__name__)

-
-class FDFDError(Exception):
-    """ Custom error for opencl_fdfd """
-    pass
-
 # Create jinja2 env on module load
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))

 # Return type for the create_opname(...) functions
-operation = Callable[..., list[pyopencl.Event]]
+operation = Callable[..., List[pyopencl.Event]]


 def type_to_C(
-        float_type: type[numpy.floating | numpy.complexfloating],
+        float_type: Type,
        ) -> str:
    """
    Returns a string corresponding to the C equivalent of a numpy type.
@ -50,37 +42,35 @@ def type_to_C(
        string containing the corresponding C type (eg. 'double')
    """
    types = {
-        numpy.float16: 'half',
        numpy.float32: 'float',
        numpy.float64: 'double',
        numpy.complex64: 'cfloat_t',
        numpy.complex128: 'cdouble_t',
    }
    if float_type not in types:
-        raise FDFDError('Unsupported type')
+        raise Exception('Unsupported type')

    return types[float_type]

-
 # Type names
 ctype = type_to_C(numpy.complex128)
 ctype_bare = 'cdouble'

 # Preamble for all OpenCL code
-preamble = f'''
+preamble = '''
 #define PYOPENCL_DEFINE_CDOUBLE
 #include <pyopencl-complex.h>

 //Defines to clean up operation and type names
-#define ctype {ctype_bare}_t
-#define zero {ctype_bare}_new(0.0, 0.0)
-#define add {ctype_bare}_add
-#define sub {ctype_bare}_sub
-#define mul {ctype_bare}_mul
-'''
+#define ctype {ctype}_t
+#define zero {ctype}_new(0.0, 0.0)
+#define add {ctype}_add
+#define sub {ctype}_sub
+#define mul {ctype}_mul
+'''.format(ctype=ctype_bare)


-def ptrs(*args: str) -> list[str]:
+def ptrs(*args: str) -> List[str]:
    return [ctype + ' *' + s for s in args]


@ -129,9 +119,9 @@ def create_a(
    des = [ctype + ' *inv_de' + a for a in 'xyz']
    dhs = [ctype + ' *inv_dh' + a for a in 'xyz']

-    #
-    # Convert p to initial E (ie, apply right preconditioner and PEC)
-    #
+    '''
+    Convert p to initial E (ie, apply right preconditioner and PEC)
+    '''
    p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec)
    P2E_kernel = ElementwiseKernel(
        context,
@ -141,9 +131,9 @@ def create_a(
        arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg),
        )

-    #
-    # Calculate intermediate H from intermediate E
-    #
+    '''
+    Calculate intermediate H from intermediate E
+    '''
    e2h_source = jinja_env.get_template('e2h.cl').render(
        mu=mu,
        pmc=pmc,
@ -157,9 +147,9 @@ def create_a(
        arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des),
        )

-    #
-    # Calculate final E (including left preconditioner)
-    #
+    '''
+    Calculate final E (including left preconditioner)
+    '''
    h2e_source = jinja_env.get_template('h2e.cl').render(
        pec=pec,
        common_cl=common_source,
@ -178,13 +168,13 @@ def create_a(
            p: pyopencl.array.Array,
            idxes: Sequence[Sequence[pyopencl.array.Array]],
            oeps: pyopencl.array.Array,
-            inv_mu: pyopencl.array.Array | None,
-            pec: pyopencl.array.Array | None,
-            pmc: pyopencl.array.Array | None,
+            inv_mu: Optional[pyopencl.array.Array],
+            pec: Optional[pyopencl.array.Array],
+            pmc: Optional[pyopencl.array.Array],
            Pl: pyopencl.array.Array,
            Pr: pyopencl.array.Array,
-            e: list[pyopencl.Event],
-            ) -> list[pyopencl.Event]:
+            e: List[pyopencl.Event],
+            ) -> List[pyopencl.Event]:
        e2 = P2E_kernel(E, p, Pr, pec, wait_for=e)
        e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2])
        e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2])
@ -236,14 +226,14 @@ def create_xr_step(context: pyopencl.Context) -> operation:
            r: pyopencl.array.Array,
            v: pyopencl.array.Array,
            alpha: complex,
-            e: list[pyopencl.Event],
-            ) -> list[pyopencl.Event]:
+            e: List[pyopencl.Event],
+            ) -> List[pyopencl.Event]:
        return [xr_kernel(x, p, r, v, alpha, wait_for=e)]

    return xr_update


-def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex, complex]]:
+def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex, complex]]:
    """
    Return a function
     ri_update(r, e)
@ -281,9 +271,9 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex
        arguments=ctype + ' *r',
        )

-    def ri_update(r: pyopencl.array.Array, e: list[pyopencl.Event]) -> tuple[complex, complex]:
+    def ri_update(r: pyopencl.array.Array, e: List[pyopencl.Event]) -> Tuple[complex, complex]:
        g = ri_kernel(r, wait_for=e).astype(ri_dtype).get()
-        rr, ri, ii = (g[qq] for qq in 'xyz')
+        rr, ri, ii = [g[q] for q in 'xyz']
        rho = rr + 2j * ri - ii
        err = rr + ii
        return rho, err
@ -324,7 +314,7 @@ def create_p_step(context: pyopencl.Context) -> operation:
            p: pyopencl.array.Array,
            r: pyopencl.array.Array,
            beta: complex,
-            e: list[pyopencl.Event]) -> list[pyopencl.Event]:
+            e: List[pyopencl.Event]) -> List[pyopencl.Event]:
        return [p_kernel(p, r, beta, wait_for=e)]

    return p_update
@ -359,7 +349,7 @@ def create_dot(context: pyopencl.Context) -> Callable[..., complex]:
    def dot(
            p: pyopencl.array.Array,
            v: pyopencl.array.Array,
-            e: list[pyopencl.Event],
+            e: List[pyopencl.Event],
            ) -> complex:
        g = dot_kernel(p, v, wait_for=e)
        return g.get()
@ -415,11 +405,11 @@ def create_a_csr(context: pyopencl.Context) -> operation:
        )

    def spmv(
-            v_out: pyopencl.array.Array,
-            m: CSRMatrix,
-            v_in: pyopencl.array.Array,
-            e: list[pyopencl.Event],
-            ) -> list[pyopencl.Event]:
+            v_out,
+            m,
+            v_in,
+            e: List[pyopencl.Event],
+            ) -> List[pyopencl.Event]:
        return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)]

    return spmv
--- a/pyproject.toml
+++ b/pyproject.toml
@ -35,10 +35,10 @@ classifiers = [
    "License :: OSI Approved :: GNU Affero General Public License v3",
    "Topic :: Scientific/Engineering",
    ]
-requires-python = ">=3.11"
+requires-python = ">=3.8"
 dynamic = ["version"]
 dependencies = [
-    "numpy>=1.26",
+    "numpy~=1.21",
    "pyopencl",
    "jinja2",
    "meanas>=0.5",
@ -46,51 +46,3 @@ dependencies = [

 [tool.hatch.version]
 path = "opencl_fdfd/__init__.py"
-
-
-[tool.ruff]
-exclude = [
-    ".git",
-    "dist",
-    ]
-line-length = 145
-indent-width = 4
-lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
-lint.select = [
-    "NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG",
-    "C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT",
-    "ARG", "PL", "R", "TRY",
-    "G010", "G101", "G201", "G202",
-    "Q002", "Q003", "Q004",
-    ]
-lint.ignore = [
-    #"ANN001",   # No annotation
-    "ANN002",   # *args
-    "ANN003",   # **kwargs
-    "ANN401",   # Any
-    "ANN101",   # self: Self
-    "SIM108",   # single-line if / else assignment
-    "RET504",   # x=y+z; return x
-    "PIE790",   # unnecessary pass
-    "ISC003",   # non-implicit string concatenation
-    "C408",     # dict(x=y) instead of {'x': y}
-    "PLR09",    # Too many xxx
-    "PLR2004",  # magic number
-    "PLC0414",  # import x as x
-    "TRY003",   # Long exception message
-    ]
-
-
-[[tool.mypy.overrides]]
-module = [
-    "scipy",
-    "scipy.optimize",
-    "scipy.linalg",
-    "scipy.sparse",
-    "scipy.sparse.linalg",
-    "pyopencl",
-    "pyopencl.array",
-    "pyopencl.elementwise",
-    "pyopencl.reduction",
-    ]
-ignore_missing_imports = true