add half-precision to type_to_C

bump version to 0.4
use f-strings everywhere
12 changed files with 387 additions and 242 deletions
--- a/.gitignore
+++ b/.gitignore
@ -60,3 +60,6 @@ target/
 # PyCharm
 .idea/
 .mypy_cache/
 .pytest_cache/
--- a/README.md
+++ b/README.md
@ -6,10 +6,10 @@ electromagnetic solver implemented in Python and OpenCL.
 **Capabilities:**
 * Arbitrary distributions of the following:
-    * Dielectric constant (```epsilon```)
+    * Dielectric constant (`epsilon`)
-    * Magnetic permeabilty (```mu```)
+    * Magnetic permeabilty (`mu`)
-    * Perfect electric conductor (```PEC```)
+    * Perfect electric conductor (`PEC`)
-    * Perfect magnetic conductor (```PMC```)
+    * Perfect magnetic conductor (`PMC`)
 * Variable-sized rectangular grids
    * Stretched-coordinate PMLs (complex cell sizes allowed)
@ -17,10 +17,10 @@ Currently, only periodic boundary conditions are included.
 PEC/PMC boundaries can be implemented by drawing PEC/PMC cells near the edges.
 Bloch boundary conditions are not included but wouldn't be very hard to add.
-The default solver ```opencl_fdfd.cg_solver(...)``` located in main.py
+The default solver `opencl_fdfd.cg_solver(...)` located in main.py
 implements the E-field wave operator directly (ie, as a list of OpenCL
 instructions rather than a matrix). Additionally, there is a slower
-(and slightly more versatile) solver in ```csr.py``` which attempts to solve
+(and slightly more versatile) solver in `csr.py` which attempts to solve
 an arbitrary sparse matrix in compressed sparse row (CSR) format using
 the same conjugate gradient method as the default solver. The CSR solver
 is significantly slower, but can be very useful for testing alternative
@ -34,29 +34,29 @@ generalization to multiple GPUs should be pretty straightforward
 ## Installation
 **Dependencies:**
-* python 3 (written and tested with 3.5) 
+* python 3 (written and tested with 3.7)
 * numpy
 * pyopencl
 * jinja2
-* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) (>=0.2)
+* [meanas](https://mpxd.net/code/jan/meanas) (>=0.5)
 Install with pip, via git:
 ```bash
-pip install git+https://mpxd.net/gogs/jan/opencl_fdfd.git@release
+pip install git+https://mpxd.net/code/jan/opencl_fdfd.git@release
 ```
 ## Use
-See the documentation for ```opencl_fdfd.cg_solver(...)```
+See the documentation for `opencl_fdfd.cg_solver(...)`
 (located in ```main.py```) for details about how to call the solver.
 The FDFD arguments are identical to those in
-```fdfd_tools.solvers.generic(...)```, and a few solver-specific
+`meanas.solvers.generic(...)`, and a few solver-specific
 arguments are available.
- 
+
 An alternate (slower) FDFD solver and a general gpu-based sparse matrix
-solver is available in ```csr.py```. These aren't particularly
+solver is available in `csr.py`. These aren't particularly
 well-optimized, and something like
 [MAGMA](http://icl.cs.utk.edu/magma/index.html) would probably be a
 better choice if you absolutely need to solve arbitrary sparse matrices
--- a/opencl_fdfd/LICENSE.md
+++ b/opencl_fdfd/LICENSE.md
@ -0,0 +1 @@
 ../LICENSE.md
--- a/opencl_fdfd/README.md
+++ b/opencl_fdfd/README.md
@ -0,0 +1 @@
 ../README.md
--- a/opencl_fdfd/init.py
+++ b/opencl_fdfd/init.py
@ -31,7 +31,7 @@
  Dependencies:
-    - fdfd_tools    ( https://mpxd.net/gogs/jan/fdfd_tools )
+    - meanas    ( https://mpxd.net/code/jan/meanas )
    - numpy
    - pyopencl
    - jinja2
@ -40,4 +40,5 @@
 from .main import cg_solver
 __author__ = 'Jan Petykiewicz'
-
+__version__ = '0.4'
 version = __version__
--- a/opencl_fdfd/csr.py
+++ b/opencl_fdfd/csr.py
@ -6,7 +6,7 @@ CSRMatrix sparse matrix representation.
 The FDFD solver (fdfd_cg_solver()) solves an FDFD problem by
 creating a sparse matrix representing the problem (using
-fdfd_tools) and then passing it to cg(), which performs a
+meanas) and then passing it to cg(), which performs a
 conjugate gradient solve.
 cg() is capable of solving arbitrary sparse matrices which
@ -14,54 +14,66 @@ satisfy the constraints for the 'conjugate gradient' algorithm
 (positive definite, symmetric) and some that don't.
 """
-from typing import Dict, Any
+from typing import Dict, Any, Optional
 import time
 import logging
 import numpy
 from numpy.typing import NDArray, ArrayLike
 from numpy.linalg import norm
 import pyopencl
 import pyopencl.array
 import scipy
-import fdfd_tools.solvers
+import meanas.fdfd.solvers
 from . import ops
-class CSRMatrix(object):
+__author__ = 'Jan Petykiewicz'
 logger = logging.getLogger(__name__)
 class CSRMatrix:
    """
    Matrix stored in Compressed Sparse Row format, in GPU RAM.
    """
-    row_ptr = None      # type: pyopencl.array.Array
+    row_ptr: pyopencl.array.Array
-    col_ind = None      # type: pyopencl.array.Array
+    col_ind: pyopencl.array.Array
-    data = None         # type: pyopencl.array.Array
+    data: pyopencl.array.Array
-    def __init__(self,
+    def __init__(
-                 queue: pyopencl.CommandQueue,
+            self,
-                 m: 'scipy.sparse.csr_matrix'):
+            queue: pyopencl.CommandQueue,
            m: 'scipy.sparse.csr_matrix',
            ) -> None:
        self.row_ptr = pyopencl.array.to_device(queue, m.indptr)
        self.col_ind = pyopencl.array.to_device(queue, m.indices)
        self.data = pyopencl.array.to_device(queue, m.data.astype(numpy.complex128))
-def cg(A: 'scipy.sparse.csr_matrix',
+def cg(
-       b: numpy.ndarray,
+        A: 'scipy.sparse.csr_matrix',
-       max_iters: int = 10000,
+        b: ArrayLike,
-       err_threshold: float = 1e-6,
+        max_iters: int = 10000,
-       context: pyopencl.Context = None,
+        err_threshold: float = 1e-6,
-       queue: pyopencl.CommandQueue = None,
+        context: Optional[pyopencl.Context] = None,
-       verbose: bool = False,
+        queue: Optional[pyopencl.CommandQueue] = None,
-       ) -> numpy.ndarray:
+        ) -> NDArray:
    """
    General conjugate-gradient solver for sparse matrices, where A @ x = b.
-    :param A: Matrix to solve (CSR format)
+    Args:
-    :param b: Right-hand side vector (dense ndarray)
+        A: Matrix to solve (CSR format)
-    :param max_iters: Maximum number of iterations
+        b: Right-hand side vector (dense ndarray)
-    :param err_threshold: Error threshold for successful solve, relative to norm(b)
+        max_iters: Maximum number of iterations
-    :param context: PyOpenCL context. Will be created if not given.
+        err_threshold: Error threshold for successful solve, relative to norm(b)
-    :param queue: PyOpenCL command queue. Will be created if not given.
+        context: PyOpenCL context. Will be created if not given.
-    :param verbose: Whether to print statistics to screen.
+        queue: PyOpenCL command queue. Will be created if not given.
-    :return: Solution vector x; returned even if solve doesn't converge.
+
    Returns:
        Solution vector x; returned even if solve doesn't converge.
    """
    start_time = time.perf_counter()
@ -102,13 +114,11 @@ def cg(A: 'scipy.sparse.csr_matrix',
    _, err2 = rhoerr_step(r, [])
    b_norm = numpy.sqrt(err2)
-    if verbose:
+    logging.debug(f'b_norm check: {b_norm}')
        print('b_norm check: ', b_norm)
    success = False
    for k in range(max_iters):
-        if verbose:
+        logging.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}')
            print('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha), end=' ')
        rho_prev = rho
        e = xr_step(x, p, r, v, alpha, [])
@ -116,8 +126,7 @@ def cg(A: 'scipy.sparse.csr_matrix',
        errs += [numpy.sqrt(err2) / b_norm]
-        if verbose:
+        logging.debug(f'err {errs[-1]}')
            print('err', errs[-1])
        if errs[-1] < err_threshold:
            success = True
@ -127,8 +136,8 @@ def cg(A: 'scipy.sparse.csr_matrix',
        e = a_step(v, m, p, e)
        alpha = rho / dot(p, v, e)
-        if verbose and k % 1000 == 0:
+        if k % 1000 == 0:
-            print(k)
+            logger.info(f'iteration {k}')
    '''
    Done solving
@ -137,43 +146,50 @@ def cg(A: 'scipy.sparse.csr_matrix',
    x = x.get()
-    if verbose:
+    if success:
-        if success:
+        logging.info('Solve success')
-            print('Success', end='')
+    else:
-        else:
+        logging.warning('Solve failure')
-            print('Failure', end=', ')
+    logging.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec')
-        print(', {} iterations in {} sec: {} iterations/sec \
+    logging.debug(f'final error {errs[-1]}')
-                      '.format(k, time_elapsed, k / time_elapsed))
+    logging.debug(f'overhead {start_time2 - start_time} sec')
        print('final error', errs[-1])
        print('overhead {} sec'.format(start_time2 - start_time))
-        print('Final residual:', norm(A @ x - b) / norm(b))
+    residual = norm(A @ x - b) / norm(b)
    logging.info(f'Final residual: {residual}')
    return x
-def fdfd_cg_solver(solver_opts: Dict[str, Any] = None,
+def fdfd_cg_solver(
-                   **fdfd_args
+        solver_opts: Optional[Dict[str, Any]] = None,
-                   ) -> numpy.ndarray:
+        **fdfd_args
        ) -> NDArray:
    """
    Conjugate gradient FDFD solver using CSR sparse matrices, mainly for
     testing and development since it's much slower than the solver in main.py.
-    Calls fdfd_tools.solvers.generic(**fdfd_args,
+    Calls meanas.fdfd.solvers.generic(
-                                     matrix_solver=opencl_fdfd.csr.cg,
+        **fdfd_args,
-                                     matrix_solver_opts=solver_opts)
+        matrix_solver=opencl_fdfd.csr.cg,
        matrix_solver_opts=solver_opts,
        )
-    :param solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...).
+    Args:
-        Default {}.
+        solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...).
-    :param fdfd_args: Passed as **fdfd_args to fdfd_tools.solver.generic(...).
+            Default {}.
-        Should include all of the arguments **except** matrix_solver and matrix_solver_opts
+        fdfd_args: Passed as **fdfd_args to fdfd_tools.solver.generic(...).
-    :return: E-field which solves the system.
+            Should include all of the arguments **except** matrix_solver and matrix_solver_opts
    Returns:
        E-field which solves the system.
    """
    if solver_opts is None:
        solver_opts = dict()
-    x = fdfd_tools.solvers.generic(matrix_solver=cg,
+    x = meanas.fdfd.solvers.generic(
-                                   matrix_solver_opts=solver_opts,
+        matrix_solver=cg,
-                                   **fdfd_args)
+        matrix_solver_opts=solver_opts,
        **fdfd_args,
        )
    return x
--- a/opencl_fdfd/kernels/e2h.cl
+++ b/opencl_fdfd/kernels/e2h.cl
@ -31,7 +31,7 @@ __global char *pmc_z = pmc + ZZ;
 //Update H components; set them to 0 if PMC is enabled at that location.
-//Mu division and PMC conditional are only included if {{mu}} and {{pmc}} are true
+//Mu division and PMC conditional are only included if {mu} and {pmc} are true
 {% if pmc -%}
 if (pmc_x[i] != 0) {
    Hx[i] = zero;
@ -42,9 +42,9 @@ if (pmc_x[i] != 0) {
    ctype Dyz = mul(sub(Ey[i + pz], Ey[i]), inv_dez[z]);
    ctype x_curl = sub(Dzy, Dyz);
-    {%- if mu -%}
+    {%- if mu %}
    Hx[i] = mul(inv_mu_x[i], x_curl);
-    {%- else -%}
+    {%- else %}
    Hx[i] = x_curl;
    {%- endif %}
 }
@ -59,9 +59,9 @@ if (pmc_y[i] != 0) {
    ctype Dzx = mul(sub(Ez[i + px], Ez[i]), inv_dex[x]);
    ctype y_curl = sub(Dxz, Dzx);
-    {%- if mu -%}
+    {%- if mu %}
    Hy[i] = mul(inv_mu_y[i], y_curl);
-    {%- else -%}
+    {%- else %}
    Hy[i] = y_curl;
    {%- endif %}
 }
@ -76,9 +76,9 @@ if (pmc_z[i] != 0) {
    ctype Dxy = mul(sub(Ex[i + py], Ex[i]), inv_dey[y]);
    ctype z_curl = sub(Dyx, Dxy);
-    {%- if mu -%}
+    {%- if mu %}
    Hz[i] = mul(inv_mu_z[i], z_curl);
-    {%- else -%}
+    {%- else %}
    Hz[i] = z_curl;
    {%- endif %}
 }
--- a/opencl_fdfd/main.py
+++ b/opencl_fdfd/main.py
@ -6,66 +6,72 @@ operator implemented directly as OpenCL arithmetic (rather than as
 a matrix).
 """
-from typing import List
+from typing import List, Optional, cast
 import time
 import logging
 import numpy
 from numpy.typing import NDArray, ArrayLike
 from numpy.linalg import norm
 import pyopencl
 import pyopencl.array
-import fdfd_tools.operators
+import meanas.fdfd.operators
 from . import ops
 __author__ = 'Jan Petykiewicz'
 logger = logging.getLogger(__name__)
-def cg_solver(omega: complex,
+
-              dxes: List[List[numpy.ndarray]],
+def cg_solver(
-              J: numpy.ndarray,
+        omega: complex,
-              epsilon: numpy.ndarray,
+        dxes: List[List[NDArray]],
-              mu: numpy.ndarray = None,
+        J: ArrayLike,
-              pec: numpy.ndarray = None,
+        epsilon: ArrayLike,
-              pmc: numpy.ndarray = None,
+        mu: Optional[ArrayLike] = None,
-              adjoint: bool = False,
+        pec: Optional[ArrayLike] = None,
-              max_iters: int = 40000,
+        pmc: Optional[ArrayLike] = None,
-              err_threshold: float = 1e-6,
+        adjoint: bool = False,
-              context: pyopencl.Context = None,
+        max_iters: int = 40000,
-              verbose: bool = False,
+        err_threshold: float = 1e-6,
-              ) -> numpy.ndarray:
+        context: Optional[pyopencl.Context] = None,
        ) -> NDArray:
    """
    OpenCL FDFD solver using the iterative conjugate gradient (cg) method
     and implementing the diagonalized E-field wave operator directly in
     OpenCL.
    All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,
-     either use fdfd_tools.vec() or numpy:
+     either use meanas.fdmath.vec() or numpy:
     f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))
-    :param omega: Complex frequency to solve at.
+    Args:
-    :param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
+        omega: Complex frequency to solve at.
-    :param J: Electric current distribution (at E-field locations)
+        dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
-    :param epsilon: Dielectric constant distribution (at E-field locations)
+        J: Electric current distribution (at E-field locations)
-    :param mu: Magnetic permeability distribution (at H-field locations)
+        epsilon: Dielectric constant distribution (at E-field locations)
-    :param pec: Perfect electric conductor distribution
+        mu: Magnetic permeability distribution (at H-field locations)
-        (at E-field locations; non-zero value indicates PEC is present)
+        pec: Perfect electric conductor distribution
-    :param pmc: Perfect magnetic conductor distribution
+            (at E-field locations; non-zero value indicates PEC is present)
-        (at H-field locations; non-zero value indicates PMC is present)
+        pmc: Perfect magnetic conductor distribution
-    :param adjoint: If true, solves the adjoint problem.
+            (at H-field locations; non-zero value indicates PMC is present)
-    :param max_iters: Maximum number of iterations. Default 40,000.
+        adjoint: If true, solves the adjoint problem.
-    :param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.
+        max_iters: Maximum number of iterations. Default 40,000.
-        Default 1e-6.
+        err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.
-    :param context: PyOpenCL context to run in. If not given, construct a new context.
+            Default 1e-6.
-    :param verbose: If True, print progress to stdout. Default False.
+        context: PyOpenCL context to run in. If not given, construct a new context.
    :return: E-field which solves the system. Returned even if we did not converge.
    """
    Returns:
        E-field which solves the system. Returned even if we did not converge.
    """
    start_time = time.perf_counter()
-    b = -1j * omega * J
+    shape = [dd.size for dd in dxes[0]]
-    shape = [d.size for d in dxes[0]]
+    b = -1j * omega * numpy.array(J, copy=False)
    '''
        ** In this comment, I use the following notation:
@ -94,15 +100,16 @@ def cg_solver(omega: complex,
        We can accomplish all this simply by conjugating everything (except J) and
         reversing the order of L and R
    '''
    epsilon = numpy.array(epsilon, copy=False)
    if adjoint:
        # Conjugate everything
-        dxes = [[numpy.conj(d) for d in dd] for dd in dxes]
+        dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes]
        omega = numpy.conj(omega)
        epsilon = numpy.conj(epsilon)
        if mu is not None:
            mu = numpy.conj(mu)
-    L, R = fdfd_tools.operators.e_full_preconditioners(dxes)
+    L, R = meanas.fdfd.operators.e_full_preconditioners(dxes)
    if adjoint:
        b_preconditioned = R @ b
@ -130,7 +137,7 @@ def cg_solver(omega: complex,
    rho = 1.0 + 0j
    errs = []
-    inv_dxes = [[load_field(1 / d) for d in dd] for dd in dxes]
+    inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes]
    oeps = load_field(-omega ** 2 * epsilon)
    Pl = load_field(L.diagonal())
    Pr = load_field(R.diagonal())
@ -138,17 +145,18 @@ def cg_solver(omega: complex,
    if mu is None:
        invm = load_field(numpy.array([]))
    else:
-        invm = load_field(1 / mu)
+        invm = load_field(1 / numpy.array(mu, copy=False))
        mu = numpy.array(mu, copy=False)
    if pec is None:
        gpec = load_field(numpy.array([]), dtype=numpy.int8)
    else:
-        gpec = load_field(pec.astype(bool), dtype=numpy.int8)
+        gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8)
    if pmc is None:
        gpmc = load_field(numpy.array([]), dtype=numpy.int8)
    else:
-        gpmc = load_field(pmc.astype(bool), dtype=numpy.int8)
+        gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8)
    '''
    Generate OpenCL kernels
@ -171,12 +179,13 @@ def cg_solver(omega: complex,
    _, err2 = rhoerr_step(r, [])
    b_norm = numpy.sqrt(err2)
-    print('b_norm check: ', b_norm)
+    logging.debug(f'b_norm check: {b_norm}')
    success = False
    for k in range(max_iters):
-        if verbose:
+        do_print = (k % 100 == 0)
-            print('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha), end=' ')
+        if do_print:
            logger.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}')
        rho_prev = rho
        e = xr_step(x, p, r, v, alpha, [])
@ -184,8 +193,8 @@ def cg_solver(omega: complex,
        errs += [numpy.sqrt(err2) / b_norm]
-        if verbose:
+        if do_print:
-            print('err', errs[-1])
+            logger.debug(f'err {errs[-1]}')
        if errs[-1] < err_threshold:
            success = True
@ -196,7 +205,7 @@ def cg_solver(omega: complex,
        alpha = rho / dot(p, v, e)
        if k % 1000 == 0:
-            print(k)
+            logger.info(f'iteration {k}')
    '''
    Done solving
@ -210,18 +219,19 @@ def cg_solver(omega: complex,
        x = (Pr * x).get()
    if success:
-        print('Success', end='')
+        logger.info('Solve success')
    else:
-        print('Failure', end=', ')
+        logger.warning('Solve failure')
-    print(', {} iterations in {} sec: {} iterations/sec \
+    logger.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec')
-                  '.format(k, time_elapsed, k / time_elapsed))
+    logger.debug(f'final error {errs[-1]}')
-    print('final error', errs[-1])
+    logger.debug(f'overhead {start_time2 - start_time} sec')
    print('overhead {} sec'.format(start_time2 - start_time))
-    A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr()
+    A0 = meanas.fdfd.operators.e_full(omega, dxes, epsilon, mu).tocsr()
    if adjoint:
        # Remember we conjugated all the contents of A earlier
        A0 = A0.T
-    print('Post-everything residual:', norm(A0 @ x - b) / norm(b))
+
    residual = norm(A0 @ x - b) / norm(b)
    logger.info(f'Post-everything residual: {residual}')
    return x
--- a/opencl_fdfd/ops.py
+++ b/opencl_fdfd/ops.py
@ -7,9 +7,11 @@ kernels for use by the other solvers.
 See kernels/ for any of the .cl files loaded in this file.
 """
-from typing import List, Callable
+from typing import List, Callable, Union, Type, Sequence, Optional, Tuple
 import logging
 import numpy
 from numpy.typing import NDArray, ArrayLike
 import jinja2
 import pyopencl
@ -18,6 +20,8 @@ from pyopencl.elementwise import ElementwiseKernel
 from pyopencl.reduction import ReductionKernel
 logger = logging.getLogger(__name__)
 # Create jinja2 env on module load
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
@ -25,14 +29,20 @@ jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
 operation = Callable[..., List[pyopencl.Event]]
-def type_to_C(float_type: numpy.float32 or numpy.float64) -> str:
+def type_to_C(
        float_type: Type,
        ) -> str:
    """
    Returns a string corresponding to the C equivalent of a numpy type.
-    :param float_type: numpy type: float32, float64, complex64, complex128
+    Args:
-    :return: string containing the corresponding C type (eg. 'double')
+        float_type: numpy type: float32, float64, complex64, complex128
    Returns:
        string containing the corresponding C type (eg. 'double')
    """
    types = {
        numpy.float16: 'half',
        numpy.float32: 'float',
        numpy.float64: 'double',
        numpy.complex64: 'cfloat_t',
@ -65,12 +75,13 @@ def ptrs(*args: str) -> List[str]:
    return [ctype + ' *' + s for s in args]
-def create_a(context: pyopencl.Context,
+def create_a(
-             shape: numpy.ndarray,
+        context: pyopencl.Context,
-             mu: bool = False,
+        shape: ArrayLike,
-             pec: bool = False,
+        mu: bool = False,
-             pmc: bool = False,
+        pec: bool = False,
-             ) -> operation:
+        pmc: bool = False,
        ) -> operation:
    """
    Return a function which performs (A @ p), where A is the FDFD wave equation for E-field.
@ -91,12 +102,15 @@ def create_a(context: pyopencl.Context,
     and returns a list of pyopencl.Event.
-    :param context: PyOpenCL context
+    Args:
-    :param shape: Dimensions of the E-field
+        context: PyOpenCL context
-    :param mu: False iff (mu == 1) everywhere
+        shape: Dimensions of the E-field
-    :param pec: False iff no PEC anywhere
+        mu: False iff (mu == 1) everywhere
-    :param pmc: False iff no PMC anywhere
+        pec: False iff no PEC anywhere
-    :return: Function for computing (A @ p)
+        pmc: False iff no PMC anywhere
    Returns:
        Function for computing (A @ p)
    """
    common_source = jinja_env.get_template('common.cl').render(shape=shape)
@ -110,41 +124,68 @@ def create_a(context: pyopencl.Context,
    Convert p to initial E (ie, apply right preconditioner and PEC)
    '''
    p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec)
-    P2E_kernel = ElementwiseKernel(context,
+    P2E_kernel = ElementwiseKernel(
-                                   name='P2E',
+        context,
-                                   preamble=preamble,
+        name='P2E',
-                                   operation=p2e_source,
+        preamble=preamble,
-                                   arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg))
+        operation=p2e_source,
        arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg),
        )
    '''
    Calculate intermediate H from intermediate E
    '''
-    e2h_source = jinja_env.get_template('e2h.cl').render(mu=mu,
+    e2h_source = jinja_env.get_template('e2h.cl').render(
-                                                         pmc=pmc,
+        mu=mu,
-                                                         common_cl=common_source)
+        pmc=pmc,
-    E2H_kernel = ElementwiseKernel(context,
+        common_cl=common_source,
-                                   name='E2H',
+        )
-                                   preamble=preamble,
+    E2H_kernel = ElementwiseKernel(
-                                   operation=e2h_source,
+        context,
-                                   arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des))
+        name='E2H',
        preamble=preamble,
        operation=e2h_source,
        arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des),
        )
    '''
    Calculate final E (including left preconditioner)
    '''
-    h2e_source = jinja_env.get_template('h2e.cl').render(pec=pec,
+    h2e_source = jinja_env.get_template('h2e.cl').render(
-                                                         common_cl=common_source)
+        pec=pec,
-    H2E_kernel = ElementwiseKernel(context,
+        common_cl=common_source,
-                                   name='H2E',
+        )
-                                   preamble=preamble,
+    H2E_kernel = ElementwiseKernel(
-                                   operation=h2e_source,
+        context,
-                                   arguments=', '.join(ptrs('E', 'H', 'oeps', 'Pl') + pec_arg + dhs))
+        name='H2E',
        preamble=preamble,
        operation=h2e_source,
        arguments=', '.join(ptrs('E', 'H', 'oeps', 'Pl') + pec_arg + dhs),
        )
-    def spmv(E, H, p, idxes, oeps, inv_mu, pec, pmc, Pl, Pr, e):
+    def spmv(
            E: pyopencl.array.Array,
            H: pyopencl.array.Array,
            p: pyopencl.array.Array,
            idxes: Sequence[Sequence[pyopencl.array.Array]],
            oeps: pyopencl.array.Array,
            inv_mu: Optional[pyopencl.array.Array],
            pec: Optional[pyopencl.array.Array],
            pmc: Optional[pyopencl.array.Array],
            Pl: pyopencl.array.Array,
            Pr: pyopencl.array.Array,
            e: List[pyopencl.Event],
            ) -> List[pyopencl.Event]:
        e2 = P2E_kernel(E, p, Pr, pec, wait_for=e)
        e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2])
        e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2])
        return [e2]
    logger.debug(f'Preamble: \n{preamble}')
    logger.debug(f'p2e: \n{p2e_source}')
    logger.debug(f'e2h: \n{e2h_source}')
    logger.debug(f'h2e: \n{h2e_source}')
    return spmv
@ -159,8 +200,11 @@ def create_xr_step(context: pyopencl.Context) -> operation:
    after waiting for all in the list e
    and returns a list of pyopencl.Event
-    :param context: PyOpenCL context
+    Args:
-    :return: Function for performing x and r updates
+        context: PyOpenCL context
    Returns:
        Function for performing x and r updates
    """
    update_xr_source = '''
    x[i] = add(x[i], mul(alpha, p[i]));
@ -169,19 +213,28 @@ def create_xr_step(context: pyopencl.Context) -> operation:
    xr_args = ', '.join(ptrs('x', 'p', 'r', 'v') + [ctype + ' alpha'])
-    xr_kernel = ElementwiseKernel(context,
+    xr_kernel = ElementwiseKernel(
-                                  name='XR',
+        context,
-                                  preamble=preamble,
+        name='XR',
-                                  operation=update_xr_source,
+        preamble=preamble,
-                                  arguments=xr_args)
+        operation=update_xr_source,
        arguments=xr_args,
        )
-    def xr_update(x, p, r, v, alpha, e):
+    def xr_update(
            x: pyopencl.array.Array,
            p: pyopencl.array.Array,
            r: pyopencl.array.Array,
            v: pyopencl.array.Array,
            alpha: complex,
            e: List[pyopencl.Event],
            ) -> List[pyopencl.Event]:
        return [xr_kernel(x, p, r, v, alpha, wait_for=e)]
    return xr_update
-def create_rhoerr_step(context: pyopencl.Context) -> operation:
+def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex, complex]]:
    """
    Return a function
     ri_update(r, e)
@ -192,8 +245,11 @@ def create_rhoerr_step(context: pyopencl.Context) -> operation:
    after waiting for all pyopencl.Event in the list e
    and returns a list of pyopencl.Event
-    :param context: PyOpenCL context
+    Args:
-    :return: Function for performing x and r updates
+        context: PyOpenCL context
    Returns:
        Function for performing x and r updates
    """
    update_ri_source = '''
@ -205,16 +261,18 @@ def create_rhoerr_step(context: pyopencl.Context) -> operation:
    # Use a vector type (double3) to make the reduction simpler
    ri_dtype = pyopencl.array.vec.double3
-    ri_kernel = ReductionKernel(context,
+    ri_kernel = ReductionKernel(
-                                name='RHOERR',
+        context,
-                                preamble=preamble,
+        name='RHOERR',
-                                dtype_out=ri_dtype,
+        preamble=preamble,
-                                neutral='(double3)(0.0, 0.0, 0.0)',
+        dtype_out=ri_dtype,
-                                map_expr=update_ri_source,
+        neutral='(double3)(0.0, 0.0, 0.0)',
-                                reduce_expr='a+b',
+        map_expr=update_ri_source,
-                                arguments=ctype + ' *r')
+        reduce_expr='a+b',
        arguments=ctype + ' *r',
        )
-    def ri_update(r, e):
+    def ri_update(r: pyopencl.array.Array, e: List[pyopencl.Event]) -> Tuple[complex, complex]:
        g = ri_kernel(r, wait_for=e).astype(ri_dtype).get()
        rr, ri, ii = [g[q] for q in 'xyz']
        rho = rr + 2j * ri - ii
@ -234,48 +292,66 @@ def create_p_step(context: pyopencl.Context) -> operation:
    after waiting for all pyopencl.Event in the list e
    and returns a list of pyopencl.Event
-    :param context: PyOpenCL context
+    Args:
-    :return: Function for performing the p update
+        context: PyOpenCL context
    Returns:
        Function for performing the p update
    """
    update_p_source = '''
    p[i] = add(r[i], mul(beta, p[i]));
    '''
    p_args = ptrs('p', 'r') + [ctype + ' beta']
-    p_kernel = ElementwiseKernel(context,
+    p_kernel = ElementwiseKernel(
-                                 name='P',
+        context,
-                                 preamble=preamble,
+        name='P',
-                                 operation=update_p_source,
+        preamble=preamble,
-                                 arguments=', '.join(p_args))
+        operation=update_p_source,
        arguments=', '.join(p_args),
        )
-    def p_update(p, r, beta, e):
+    def p_update(
            p: pyopencl.array.Array,
            r: pyopencl.array.Array,
            beta: complex,
            e: List[pyopencl.Event]) -> List[pyopencl.Event]:
        return [p_kernel(p, r, beta, wait_for=e)]
    return p_update
-def create_dot(context: pyopencl.Context) -> operation:
+def create_dot(context: pyopencl.Context) -> Callable[..., complex]:
    """
    Return a function for performing the dot product
     p @ v
    with the signature
-     dot(p, v, e) -> float
+     dot(p, v, e) -> complex
-    :param context: PyOpenCL context
+    Args:
-    :return: Function for performing the dot product
+        context: PyOpenCL context
    Returns:
        Function for performing the dot product
    """
    dot_dtype = numpy.complex128
-    dot_kernel = ReductionKernel(context,
+    dot_kernel = ReductionKernel(
-                                 name='dot',
+        context,
-                                 preamble=preamble,
+        name='dot',
-                                 dtype_out=dot_dtype,
+        preamble=preamble,
-                                 neutral='zero',
+        dtype_out=dot_dtype,
-                                 map_expr='mul(p[i], v[i])',
+        neutral='zero',
-                                 reduce_expr='add(a, b)',
+        map_expr='mul(p[i], v[i])',
-                                 arguments=ptrs('p', 'v'))
+        reduce_expr='add(a, b)',
        arguments=ptrs('p', 'v'),
        )
-    def dot(p, v, e):
+    def dot(
            p: pyopencl.array.Array,
            v: pyopencl.array.Array,
            e: List[pyopencl.Event],
            ) -> complex:
        g = dot_kernel(p, v, wait_for=e)
        return g.get()
@ -296,8 +372,11 @@ def create_a_csr(context: pyopencl.Context) -> operation:
    The function waits on all the pyopencl.Event in e before running, and returns
     a list of pyopencl.Event.
-    :param context: PyOpenCL context
+    Args:
-    :return: Function for sparse (M @ v) operation where M is in CSR format
+        context: PyOpenCL context
    Returns:
        Function for sparse (M @ v) operation where M is in CSR format
    """
    spmv_source = '''
    int start = m_row_ptr[i];
@ -318,13 +397,20 @@ def create_a_csr(context: pyopencl.Context) -> operation:
    m_args = 'int *m_row_ptr, int *m_col_ind, ' + ctype + ' *m_data'
    v_in_args = ctype + ' *v_in'
-    spmv_kernel = ElementwiseKernel(context,
+    spmv_kernel = ElementwiseKernel(
-                                    name='csr_spmv',
+        context,
-                                    preamble=preamble,
+        name='csr_spmv',
-                                    operation=spmv_source,
+        preamble=preamble,
-                                    arguments=', '.join((v_out_args, m_args, v_in_args)))
+        operation=spmv_source,
        arguments=', '.join((v_out_args, m_args, v_in_args)),
        )
-    def spmv(v_out, m, v_in, e):
+    def spmv(
            v_out,
            m,
            v_in,
            e: List[pyopencl.Event],
            ) -> List[pyopencl.Event]:
        return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)]
    return spmv
--- a/opencl_fdfd/py.typed
+++ b/opencl_fdfd/py.typed
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,48 @@
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 [project]
 name = "opencl_fdfd"
 description = "OpenCL FDFD solver"
 readme = "README.md"
 license = { file = "LICENSE.md" }
 authors = [
    { name="Jan Petykiewicz", email="jan@mpxd.net" },
    ]
 homepage = "https://mpxd.net/code/jan/opencl_fdfd"
 repository = "https://mpxd.net/code/jan/opencl_fdfd"
 keywords = [
    "FDFD",
    "finite",
    "difference",
    "frequency",
    "domain",
    "simulation",
    "optics",
    "electromagnetic",
    "dielectric",
    "PML",
    "solver",
    "FDTD",
    ]
 classifiers = [
    "Programming Language :: Python :: 3",
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "Intended Audience :: Manufacturing",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: GNU Affero General Public License v3",
    "Topic :: Scientific/Engineering",
    ]
 requires-python = ">=3.8"
 dynamic = ["version"]
 dependencies = [
    "numpy~=1.21",
    "pyopencl",
    "jinja2",
    "meanas>=0.5",
    ]
 [tool.hatch.version]
 path = "opencl_fdfd/__init__.py"
--- a/setup.py
+++ b/setup.py
@ -1,21 +0,0 @@
 #!/usr/bin/env python
 from setuptools import setup, find_packages
 setup(name='opencl_fdfd',
      version='0.3',
      description='Opencl FDFD solver',
      author='Jan Petykiewicz',
      author_email='anewusername@gmail.com',
      url='https://mpxd.net/gogs/jan/opencl_fdfd',
      packages=find_packages(),
      install_requires=[
            'numpy',
            'pyopencl',
            'jinja2',
            'fdfd_tools>=0.3',
      ],
      extras_require={
      },
      )
Author	SHA1	Message	Date
jan	c42ce49983	add half-precision to type_to_C	1 year ago
Jan Petykiewicz	b8ea859106	bump version to 0.4	2 years ago
Jan Petykiewicz	3d8054ba40	use f-strings everywhere	2 years ago
Jan Petykiewicz	efeb29479b	improve type annotations, formatting, comment styles	2 years ago
Jan Petykiewicz	81bb1dd2c0	add caches to .gitignore	2 years ago
Jan Petykiewicz	e41a71ce6f	move to hatch-based build	2 years ago
Jan Petykiewicz	cba31bf081	use new email	3 years ago
Jan Petykiewicz	77f53affe7	use VERSION.py instead of importing package before it's installed	3 years ago
Jan Petykiewicz	0ebfa030c4	README fixes	3 years ago
Jan Petykiewicz	5861767a00	depend on meanas instad of fdfd_tools	3 years ago
jan	792b161753	avoid importing the package before its installed...	4 years ago
jan	66c30e6eab	move from fdfd_tools to meanas	4 years ago
jan	2130b015fd	readme updates	4 years ago
Jan Petykiewicz	4b798893bc	Use python3 for setup	6 years ago
Jan Petykiewicz	3cd26265bd	Use readme as long_description	6 years ago
Jan Petykiewicz	8360f98395	Move version string into module	6 years ago
Jan Petykiewicz	b62cd6e867	move code to new location	6 years ago
jan	c96d367502	fixup package info	7 years ago
Jan Petykiewicz	1d5e7ff3bb	minor cleanup of generated source	7 years ago
Jan Petykiewicz	4ffa5e4a66	use logging package for output, and remove 'verbose' options	7 years ago