autodetect uniform dxes, and fix nonuniform dxes setup

update deps
add ruff and mypy configs
2025-01-31 20:17:15 -08:00 · 2024-07-30 23:19:31 -07:00 · 2024-07-30 23:19:21 -07:00 · 2024-07-30 23:19:07 -07:00 · 2024-07-30 23:18:54 -07:00 · 2024-01-10 20:57:40 -08:00
14 changed files with 905 additions and 552 deletions
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,29 @@
+[flake8]
+ignore =
+    # E501 line too long
+    E501,
+    # W391 newlines at EOF
+    W391,
+    # E241 multiple spaces after comma
+    E241,
+    # E302 expected 2 newlines
+    E302,
+    # W503 line break before binary operator (to be deprecated)
+    W503,
+    # E265 block comment should start with '# '
+    E265,
+    # E123 closing bracket does not match indentation of opening bracket's line
+    E123,
+    # E124 closing bracket does not match visual indentation
+    E124,
+    # E221 multiple spaces before operator
+    E221,
+    # E201 whitespace after '['
+    E201,
+    # E741 ambiguous variable name 'I'
+    E741,
+
+
+per-file-ignores =
+    # F401 import without use
+    */__init__.py: F401,
--- a/.gitignore
+++ b/.gitignore
@ -1,9 +1,11 @@
 .idea/
 *.h5

-__pycache__
+__pycache__/
 *.py[cod]
 build/
 dist/
 *.egg-info/

+.pytest_cache/
+.mypy_cache/
--- a/fdtd.py
+++ b/fdtd.py
@ -7,6 +7,7 @@ See main() for simulation setup.
 import sys
 import time
 import logging
+import pyopencl

 import numpy
 import lzma
@ -16,7 +17,7 @@ from opencl_fdtd import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
-import fdfd_tools
+import meanas


 __author__ = 'Jan Petykiewicz'
@ -29,60 +30,69 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
    """
    Generate a masque.Pattern object containing a perturbed L3 cavity.

-    :param a: Lattice constant.
-    :param radius: Hole radius, in units of a (lattice constant).
-    :param kwargs: Keyword arguments:
-        hole_dose, trench_dose, hole_layer, trench_layer: Shape properties for Pattern.
-                Defaults *_dose=1, hole_layer=0, trench_layer=1.
-        shifts_a, shifts_r: passed to pcgen.l3_shift; specifies lattice constant (1 -
-                multiplicative factor) and radius (multiplicative factor) for shifting
-                holes adjacent to the defect (same row). Defaults are 0.15 shift for
-                first hole, 0.075 shift for third hole, and no radius change.
+    Args:
+        a: Lattice constant.
+        radius: Hole radius, in units of a (lattice constant).
+        hole_dose: Dose for all holes. Default 1.
+        trench_dose: Dose for undercut trenches. Default 1.
+        hole_layer: Layer for holes. Default (0, 0).
+        trench_layer: Layer for undercut trenches. Default (1, 0).
+        shifts_a: passed to pcgen.l3_shift
+        shifts_r: passed to pcgen.l3_shift
        xy_size: [x, y] number of mirror periods in each direction; total size is
-                2 * n + 1 holes in each direction. Default [10, 10].
+                2 * n + 1 holes in each direction. Default (10, 10).
        perturbed_radius: radius of holes perturbed to form an upwards-driected beam
                (multiplicative factor). Default 1.1.
        trench width: Width of the undercut trenches. Default 1.2e3.
-    :return: masque.Pattern object containing the L3 design
+
+    Returns:
+        `masque.Pattern` object containing the L3 design
    """

-    default_args = {'hole_dose':    1,
-                    'trench_dose':  1,
-                    'hole_layer':   0,
-                    'trench_layer': 1,
-                    'shifts_a':     (0.15, 0, 0.075),
-                    'shifts_r':     (1.0, 1.0, 1.0),
-                    'xy_size':      (10, 10),
-                    'perturbed_radius': 1.1,
-                    'trench_width': 1.2e3,
-                    }
+    default_args = {
+        'hole_dose':    1,
+        'trench_dose':  1,
+        'hole_layer':   0,
+        'trench_layer': 1,
+        'shifts_a':     (0.15, 0, 0.075),
+        'shifts_r':     (1.0, 1.0, 1.0),
+        'xy_size':      (10, 10),
+        'perturbed_radius': 1.1,
+        'trench_width': 1.2e3,
+        }
    kwargs = {**default_args, **kwargs}

-    xyr = pcgen.l3_shift_perturbed_defect(mirror_dims=kwargs['xy_size'],
-                                          perturbed_radius=kwargs['perturbed_radius'],
-                                          shifts_a=kwargs['shifts_a'],
-                                          shifts_r=kwargs['shifts_r'])
+    xyr = pcgen.l3_shift_perturbed_defect(
+        mirror_dims=kwargs['xy_size'],
+        perturbed_radius=kwargs['perturbed_radius'],
+        shifts_a=kwargs['shifts_a'],
+        shifts_r=kwargs['shifts_r'],
+        )
    xyr *= a
    xyr[:, 2] *= radius

    pat = Pattern()
-    pat.name = 'L3p-a{:g}r{:g}rp{:g}'.format(a, radius, kwargs['perturbed_radius'])
+    pat.name = f'L3p-a{a:g}r{radius:g}rp{kwargs["perturbed_radius"]:g}'
    pat.shapes += [shapes.Circle(radius=r, offset=(x, y),
                                 dose=kwargs['hole_dose'],
                                 layer=kwargs['hole_layer'])
                   for x, y, r in xyr]

    maxes = numpy.max(numpy.fabs(xyr), axis=0)
-    pat.shapes += [shapes.Polygon.rectangle(
-        lx=(2 * maxes[0]), ly=kwargs['trench_width'],
-        offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
-        dose=kwargs['trench_dose'], layer=kwargs['trench_layer'])
-                   for s in (-1, 1)]
+    pat.shapes += [
+        shapes.Polygon.rectangle(
+            lx=(2 * maxes[0]),
+            ly=kwargs['trench_width'],
+            offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
+            dose=kwargs['trench_dose'],
+            layer=kwargs['trench_layer'],
+            )
+        for s in (-1, 1)]
    return pat


 def main():
-    max_t = 8000            # number of timesteps
+    max_t = 4000            # number of timesteps

    dx = 25                 # discretization (nm/cell)
    pml_thickness = 8       # (number of cells)
@ -101,36 +111,43 @@ def main():
    n_air = 1.0   # air

    # Half-dimensions of the simulation grid
-    xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
-    z_max = 1.6 * a
-    xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
-
-    # Coordinates of the edges of the cells.
-    #  The fdtd package can only do square grids at the moment.
-    half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
-    edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
+#    xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
+#    z_max = 1.6 * a
+#    xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
+#
+#    # Coordinates of the edges of the cells.
+#    #  The fdtd package can only do square grids at the moment.
+#    half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
+#    edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
+    edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-1, 1), numpy.arange(-100.5, 101)]
+#    edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-100.5, 101), numpy.arange(-1, 1)]

    # #### Create the grid, mask, and draw the device ####
-    grid = gridlock.Grid(edge_coords, initial=n_air**2, num_grids=3)
-    grid.draw_slab(surface_normal=gridlock.Direction.z,
-                   center=[0, 0, 0],
-                   thickness=th,
-                   eps=n_slab**2)
-    mask = perturbed_l3(a, r)
+    grid = gridlock.Grid(edge_coords)
+    epsilon = grid.allocate(n_air**2)
+#    grid.draw_slab(epsilon,
+#                   surface_normal=2,
+#                   center=[0, 0, 0],
+#                   thickness=th,
+#                   eps=n_slab**2)
+#    mask = perturbed_l3(a, r)
+#
+#    grid.draw_polygons(epsilon,
+#                       surface_normal=2,
+#                       center=[0, 0, 0],
+#                       thickness=2 * th,
+#                       eps=n_air**2,
+#                       polygons=mask.as_polygons())

-    grid.draw_polygons(surface_normal=gridlock.Direction.z,
-                       center=[0, 0, 0],
-                       thickness=2 * th,
-                       eps=n_air**2,
-                       polygons=mask.as_polygons())
-
-    logger.info('grid shape: {}'.format(grid.shape))
+    logger.info(f'grid shape: {grid.shape}')
    # #### Create the simulation grid ####
+#    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
+#            for a in 'xyz' for p in 'np']
    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
-            for a in 'xyz' for p in 'np']
+            for a in 'xz' for p in 'np']
    #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
    bloch = []
-    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
+    sim = Simulation(epsilon, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)

    # Source parameters and function
    w = 2 * numpy.pi * dx / wl
@ -155,36 +172,88 @@ def main():
            f.write('\n=====================G=====================\n')
            f.write(sim.sources['G'])

+
+    planes = numpy.empty((max_t, 4))
+    planes2 = numpy.empty((max_t, 4))
+    Ectr = numpy.empty(max_t)
+    u = numpy.empty(max_t)
+    ui = numpy.empty(max_t)
    # #### Run a bunch of iterations ####
    # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
    #  immediately and run once prev_event is finished.
    start = time.perf_counter()
    for t in range(max_t):
        e = sim.update_E([])
-        if bloch:
-            e = sim.update_F([e])
+#        if bloch:
+#            e = sim.update_F([e])
        e.wait()

        ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
-        sim.E[ind] += field_source(t)
+#        sim.E[ind] += field_source(t)
+        if t == 2:
+            sim.E[ind] += 1e6
+
+        h_old = sim.H.copy()
+
        e = sim.update_H([])
-        if bloch:
-            e = sim.update_G([e])
-        if sim.update_S:
-            e = sim.update_S([e])
+#        if bloch:
+#            e = sim.update_G([e])
        e.wait()

+        S = sim.S.get().reshape(epsilon.shape) * sim.dt * dx * dx *dx
+        m = 30
+        planes[t] = (
+            S[0][+pml_thickness+2, :, pml_thickness+3:-pml_thickness-3].sum(),
+            S[0][-pml_thickness-2, :, pml_thickness+3:-pml_thickness-3].sum(),
+            S[2][pml_thickness+2:-pml_thickness-2, :, +pml_thickness+2].sum(),
+            S[2][pml_thickness+2:-pml_thickness-2, :, -pml_thickness-2].sum(),
+            )
+        planes2[t] = (
+            S[0][grid.shape[0]//2-1, 0, grid.shape[2]//2].sum(),
+            S[0][grid.shape[0]//2  , 0, grid.shape[2]//2].sum(),
+            S[2][grid.shape[0]//2  , 0, grid.shape[2]//2-1].sum(),
+            S[2][grid.shape[0]//2  , 0, grid.shape[2]//2].sum(),
+            )
+
+#        planes[t] = (
+#            S[0][+pml_thickness+m, pml_thickness+m+1:-pml_thickness-m, :].sum(),
+#            S[0][-pml_thickness-m, pml_thickness+m+1:-pml_thickness-m, :].sum(),
+#            S[1][pml_thickness+1+m:-pml_thickness-m, +pml_thickness+m, :].sum(),
+#            S[1][pml_thickness+1+m:-pml_thickness-m, -pml_thickness-m, :].sum(),
+#            )
+#        planes2[t] = (
+#            S[0][grid.shape[0]//2-1, grid.shape[1]//2  , 0].sum(),
+#            S[0][grid.shape[0]//2  , grid.shape[1]//2  , 0].sum(),
+#            S[1][grid.shape[0]//2  , grid.shape[1]//2-1, 0].sum(),
+#            S[1][grid.shape[0]//2  , grid.shape[1]//2  , 0].sum(),
+#            )
+        Ectr[t] = sim.E[ind].get()
+        u[t] = pyopencl.array.sum(sim.E * sim.E * sim.eps + h_old * sim.H).get() * dx * dx * dx
+        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m, :,
+                                                                                          pml_thickness+m:-pml_thickness-m].sum() * dx * dx * dx
+#        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m,
+#                                                                                          pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
+
        if t % 100 == 0:
-            logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+            avg = (t + 1) / (time.perf_counter() - start)
+            logger.info(f'iteration {t}: average {avg} iterations per sec')
            sys.stdout.flush()

    with lzma.open('saved_simulation', 'wb') as f:
        def unvec(f):
-            return fdfd_tools.unvec(f, grid.shape)
+            return meanas.fdmath.unvec(f, grid.shape)
        d = {
            'grid': grid,
+            'epsilon': epsilon,
            'E': unvec(sim.E.get()),
            'H': unvec(sim.H.get()),
+            'dt': sim.dt,
+            'dx': dx,
+            'planes': planes,
+            'planes2': planes2,
+            'Ectr': Ectr,
+            'u': u,
+            'ui': ui,
            }
        if sim.S is not None:
            d['S'] = unvec(sim.S.get())
--- a/opencl_fdtd/LICENSE.md
+++ b/opencl_fdtd/LICENSE.md
@ -0,0 +1 @@
+../LICENSE.md
--- a/opencl_fdtd/README.md
+++ b/opencl_fdtd/README.md
@ -0,0 +1 @@
+../README.md
--- a/opencl_fdtd/init.py
+++ b/opencl_fdtd/init.py
@ -1,5 +1,8 @@
-from .simulation import Simulation, type_to_C
+from .simulation import (
+    Simulation as Simulation,
+    type_to_C as type_to_C,
+    )

 __author__ = 'Jan Petykiewicz'
-
-version = '0.4'
+__version__ = '0.4'
+version = __version__
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@ -92,7 +92,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
    dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
    dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
-    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; //note u uses v
    {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
    pE{{u}}i {{se}}= {{psi ~ u}}[ip];
    pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@ -6,13 +6,14 @@
 *   common_header: Rendered contents of common.cl
 *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
 *      axes, polarities, and thicknesses.
- *   do_poynting: Whether to precalculate poynting vector components (boolean)
+ *   do_poynting: Whether to calculate poynting vector (boolean)
+ *   do_poynting_halves: Whether to calculate half-step poynting vectors (boolean)
 *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
 *      Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
 *      OpenCL args.
 *
 *  OpenCL args:
- *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [S], [S0, S1]
 */

 {{common_header}}
@ -52,24 +53,7 @@ if ({{r}} == s{{r}} - 1) {
    dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
    dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
 }
-{% endfor -%}
-
-{%- if do_poynting %}
-
-
-/*
- *  Precalculate averaged E
- */
-ftype aExy = Ex[i + py] + Ex[i];
-ftype aExz = Ex[i + pz] + Ex[i];
-
-ftype aEyx = Ey[i + px] + Ey[i];
-ftype aEyz = Ey[i + pz] + Ey[i];
-
-ftype aEzx = Ez[i + px] + Ez[i];
-ftype aEzy = Ez[i + py] + Ez[i];
-{%- endif %}
-
+{%- endfor %}



@ -118,7 +102,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 /*
 *  Update H
 */
-{% if do_poynting -%}
+{% if do_poynting or do_poynting_halves -%}
 // Save old H for averaging
 ftype Hx_old = Hx[i];
 ftype Hy_old = Hy[i];
@ -131,25 +115,40 @@ Hy[i] -= dt * (dExz - dEzx - pHyi);
 Hz[i] -= dt * (dEyx - dExy - pHzi);

 {% if do_poynting -%}
-// Average H across timesteps
-ftype aHxt = Hx[i] + Hx_old;
-ftype aHyt = Hy[i] + Hy_old;
-ftype aHzt = Hz[i] + Hz_old;
-
 /*
- *  Calculate unscaled S components at H locations
+ *  Calculate unscaled S components at ??? locations
+ *  //TODO: document S locations and types
 */
-__global ftype *oSxy = oS + 0 * field_size;
-__global ftype *oSyz = oS + 1 * field_size;
-__global ftype *oSzx = oS + 2 * field_size;
-__global ftype *oSxz = oS + 3 * field_size;
-__global ftype *oSyx = oS + 4 * field_size;
-__global ftype *oSzy = oS + 5 * field_size;
+__global ftype *Sx = S + XX;
+__global ftype *Sy = S + YY;
+__global ftype *Sz = S + ZZ;

-oSxy[i] = aEyx * aHzt;
-oSxz[i] = -aEzx * aHyt;
-oSyz[i] = aEzy * aHxt;
-oSyx[i] = -aExy * aHzt;
-oSzx[i] = aExz * aHyt;
-oSzy[i] = -aEyz * aHxt;
+// Sum old and new H
+ftype sHxt = Hx[i] + Hx_old;
+ftype sHyt = Hy[i] + Hy_old;
+ftype sHzt = Hz[i] + Hz_old;
+
+Sx[i] = Ey[i + px] * sHzt - Ez[i + px] * sHyt;
+Sy[i] = Ez[i + py] * sHxt - Ex[i + py] * sHzt;
+Sz[i] = Ex[i + pz] * sHyt - Ey[i + pz] * sHxt;
+{%- endif -%}
+
+{% if do_poynting_halves -%}
+/*
+ *  Calculate unscaled S components at ??? locations
+ *  //TODO: document S locations and types
+ */
+__global ftype *Sx0 = S0 + XX;
+__global ftype *Sy0 = S0 + YY;
+__global ftype *Sz0 = S0 + ZZ;
+__global ftype *Sx1 = S1 + XX;
+__global ftype *Sy1 = S1 + YY;
+__global ftype *Sz1 = S1 + ZZ;
+
+Sx0[i] = Ey[i + px] * Hz_old - Ez[i + px] * Hy_old;
+Sy0[i] = Ez[i + py] * Hx_old - Ex[i + py] * Hz_old;
+Sz0[i] = Ex[i + pz] * Hy_old - Ey[i + pz] * Hx_old;
+Sx1[i] = Ey[i + px] * Hz[i] - Ez[i + px] * Hy[i];
+Sy1[i] = Ez[i + py] * Hx[i] - Ex[i + py] * Hz[i];
+Sz1[i] = Ex[i + pz] * Hy[i] - Ey[i + pz] * Hx[i];
 {%- endif -%}
--- a/opencl_fdtd/kernels/update_s.cl
+++ b/opencl_fdtd/kernels/update_s.cl
@ -1,36 +0,0 @@
-/*
- *  Update E-field, including any PMLs.
- *
- *  Template parameters:
- *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
- *
- *  OpenCL args:
- *   E, H, dt, S, oS
- */
-
-{{common_header}}
-
-//////////////////////////////////////////////////////////////////////
-
-
-/*
- * Calculate S from oS (pre-calculated components)
- */
-__global ftype *Sx = S + XX;
-__global ftype *Sy = S + YY;
-__global ftype *Sz = S + ZZ;
-
-// Use unscaled S components from H locations
-__global ftype *oSxy = oS + 0 * field_size;
-__global ftype *oSyz = oS + 1 * field_size;
-__global ftype *oSzx = oS + 2 * field_size;
-__global ftype *oSxz = oS + 3 * field_size;
-__global ftype *oSyx = oS + 4 * field_size;
-__global ftype *oSzy = oS + 5 * field_size;
-
-ftype s_factor = dt * 0.125;
-Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
-Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
-Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@ -2,28 +2,35 @@
 Class for constructing and holding the basic FDTD operations and fields
 """

-from typing import List, Dict, Callable
-from collections import OrderedDict
+from collections.abc import Callable, Sequence
 import numpy
+from numpy.typing import NDArray
+from numpy import floating, complexfloating
 import jinja2
 import warnings
+import logging

 import pyopencl
 import pyopencl.array
 from pyopencl.elementwise import ElementwiseKernel

-from fdfd_tools import vec
+from meanas.fdmath import vec


-__author__ = 'Jan Petykiewicz'
+logger = logging.getLogger(__name__)


 # Create jinja2 env on module load
-jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
+jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__.split('.')[0], 'kernels'))


-class Simulation(object):
-    """
+class FDTDError(Exception):
+    """ Custom exception for opencl_fdtd """
+    pass
+
+
+class Simulation:
+    r"""
    Constructs and holds the basic FDTD operations and related fields

    After constructing this object, call the (update_E, update_H, update_S) members
@ -32,7 +39,7 @@ class Simulation(object):
        pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
        sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
        with open('sources.c', 'w') as f:
-            f.write('{}'.format(sim.sources))
+            f.write(f'{sim.sources}')

        for t in range(max_t):
            sim.update_E([]).wait()
@ -48,7 +55,7 @@ class Simulation(object):
            event.wait()

            with lzma.open('saved_simulation', 'wb') as f:
-                dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
+                dill.dump(meanas.fdmath.unvec(sim.E.get(), grid.shape), f)

    Code in the form
        event2 = sim.update_H([event0, event1])
@ -56,70 +63,70 @@ class Simulation(object):
     event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
     event2 can then be used to prepare further operations to be run after update_H.
    """
-    E = None            # type: pyopencl.array.Array
-    H = None            # type: pyopencl.array.Array
-    S = None            # type: pyopencl.array.Array
-    eps = None          # type: pyopencl.array.Array
-    dt = None           # type: float
-    inv_dxes = None     # type: List[pyopencl.array.Array]
+    E: pyopencl.array.Array
+    H: pyopencl.array.Array
+    S: pyopencl.array.Array
+    eps: pyopencl.array.Array
+    dt: float
+    inv_dxes: list[pyopencl.array.Array]

-    arg_type = None     # type: numpy.float32 or numpy.float64
+    arg_type: type

-    context = None      # type: pyopencl.Context
-    queue = None        # type: pyopencl.CommandQueue
+    context: pyopencl.Context
+    queue: pyopencl.CommandQueue

-    update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_J = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    sources = None      # type: Dict[str, str]
+    update_E: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_H: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_S: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_J: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    sources: dict[str, str]

-    def __init__(self,
-                 epsilon: List[numpy.ndarray],
-                 pmls: List[Dict[str, int or float]],
-                 bloch_boundaries: List[Dict[str, int or float]] = (),
-                 dxes: List[List[numpy.ndarray]] or float = None,
-                 dt: float = None,
-                 initial_fields: Dict[str, List[numpy.ndarray]] = None,
-                 context: pyopencl.Context = None,
-                 queue: pyopencl.CommandQueue = None,
-                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 do_poynting: bool = True,
-                 do_fieldsrc: bool = False):
+    def __init__(
+            self,
+            epsilon: NDArray,
+            pmls: Sequence[dict[str, float]],
+            bloch_boundaries: Sequence[dict[str, float]] = (),
+            dxes: list[list[NDArray]] | float | None = None,
+            dt: float | None = None,
+            initial_fields: dict[str, NDArray] | None = None,
+            context: pyopencl.Context | None = None,
+            queue: pyopencl.CommandQueue | None = None,
+            float_type: type = numpy.float32,
+            do_poynting: bool = True,
+            do_fieldsrc: bool = False,
+            ) -> None:
        """
        Initialize the simulation.

-        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
+        Args:
+            epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
                spanning the simulation domain. Relative epsilon is used.
-        :param pmls: List of dicts with keys:
-            'axis': One of 'x', 'y', 'z'.
-            'direction': One of 'n', 'p'.
-            'thickness': Number of layers, default 8.
-            'epsilon_eff': Effective epsilon to match to. Default 1.0.
-            'mu_eff': Effective mu to match to. Default 1.0.
-            'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
-            'm': Polynomial grading exponent. Default 3.5.
-            'ma': Exponent for alpha. Default 1.
-        :param bloch_boundaries: List of dicts with keys:
-            'axis': One of 'x', 'y', 'z'.
-            'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
-            'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
-        :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
-        :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
-            specified fields (default is 0 everywhere). Fields have same format as epsilon.
-        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
-        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
-        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
-        :param do_poynting: If true, enables calculation of the poynting vector, S.
+            pmls: List of dicts with keys:
+                'axis': One of 'x', 'y', 'z'.
+                'direction': One of 'n', 'p'.
+                'thickness': Number of layers, default 8.
+                'epsilon_eff': Effective epsilon to match to. Default 1.0.
+                'mu_eff': Effective mu to match to. Default 1.0.
+                'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
+                'm': Polynomial grading exponent. Default 3.5.
+                'ma': Exponent for alpha. Default 1.
+            bloch_boundaries: List of dicts with keys:
+                'axis': One of 'x', 'y', 'z'.
+                'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
+                'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
+            dt: Time step. Default is min(dxes) * .99/sqrt(3).
+            initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
+                specified fields (default is 0 everywhere). Fields have same format as epsilon.
+            context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
+            queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
+            float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+            do_poynting: If True, enables calculation of the poynting vector, S.
                Poynting vector calculation adds the following computational burdens:
-                    * During update_H, ~6 extra additions/cell are performed in order to spatially
-                        average E and temporally average H. These quantities are multiplied
-                        (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
-                    * update_S performs a discrete cross product using the precalculated products
-                        from update_H. This is not nice to the cache and similar to e.g. update_E
-                        in complexity.
-                    * GPU memory requirements are approximately doubled, since S and the intermediate
-                        products must be stored.
+                * During update_H, 12 extra additions/cell are performed in order to temporally
+                    sum E and H. The results are then multiplied by E (6 multiplications/cell) and
+                    then stored (6 writes/cell, cache-friendly). The E-field components are
+                    reused from the H-field update and do not require additional H
+                * GPU memory requirements increase by 50% (for storing S)
        """
        if initial_fields is None:
            initial_fields = {}
@ -133,22 +140,26 @@ class Simulation(object):
        if dxes is None:
            dxes = 1.0

-        if isinstance(dxes, (float, int)):
+        if isinstance(dxes, float | int):
            uniform_dx = dxes
            min_dx = dxes
+        elif all((dxes[0][0][0] == dx).all() for dx in dxes[0] + dxes[1]):
+            uniform_dx = dxes[0][0][0]
+            min_dx = dxes[0][0][0]
        else:
            uniform_dx = False
-            self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]]
+            self.inv_dxes = self._create_inv_dxes(dxes)
            min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])

        max_dt = min_dx * .99 / numpy.sqrt(3)
+        logger.info(f'{min_dx=}, {max_dt=}, {dt=}')

        if dt is None:
            self.dt = max_dt
        elif dt > max_dt:
-            warnings.warn('Warning: unstable dt: {}'.format(dt))
+            warnings.warn(f'Warning: unstable dt: {dt}', stacklevel=2)
        elif dt <= 0:
-            raise Exception('Invalid dt: {}'.format(dt))
+            raise FDTDError(f'Invalid dt: {dt}')
        else:
            self.dt = dt

@ -170,42 +181,46 @@ class Simulation(object):
        ctype = type_to_C(self.arg_type)

        def ptr(arg: str) -> str:
-            return ctype + ' * restrict ' + arg
+            return ctype + ' *' + arg

-        base_fields = OrderedDict()
-        base_fields[ptr('E')] = self.E
-        base_fields[ptr('H')] = self.H
-        base_fields[ctype + ' dt'] = self.dt
-        if uniform_dx == False:
+        base_fields = {
+            ptr('E'): self.E,
+            ptr('H'): self.H,
+            ctype + ' dt': self.dt,
+            }
+        if uniform_dx is False:
            inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
-            for name, field in zip(inv_dx_names, self.inv_dxes):
+            for name, field in zip(inv_dx_names, self.inv_dxes, strict=True):
                base_fields[ptr(name)] = field

-        eps_field = OrderedDict()
-        eps_field[ptr('eps')] = self.eps
+        eps_field = {ptr('eps'): self.eps}

        if bloch_boundaries:
-            base_fields[ptr('F')] = self.F
-            base_fields[ptr('G')] = self.G
+            base_fields |= {
+                ptr('F'): self.F,
+                ptr('G'): self.G,
+                }

-            bloch_fields = OrderedDict()
-            bloch_fields[ptr('E')] = self.F
-            bloch_fields[ptr('H')] = self.G
-            bloch_fields[ctype + ' dt'] = self.dt
-            bloch_fields[ptr('F')] = self.E
-            bloch_fields[ptr('G')] = self.H
+            bloch_fields = {
+                ptr('E'): self.F,
+                ptr('H'): self.G,
+                ctype + ' dt': self.dt,
+                ptr('F'): self.E,
+                ptr('G'): self.H,
+                }

        common_source = jinja_env.get_template('common.cl').render(
-                ftype=ctype,
-                shape=self.shape,
-                )
+            ftype=ctype,
+            shape=self.shape,
+            )
        jinja_args = {
-                'common_header': common_source,
-                'pmls': pmls,
-                'do_poynting': do_poynting,
-                'bloch': bloch_boundaries,
-                'uniform_dx': uniform_dx,
-                }
+            'common_header': common_source,
+            'pmls': pmls,
+            'do_poynting': do_poynting,
+            'do_poynting_halves': do_poynting_halves,
+            'bloch': bloch_boundaries,
+            'uniform_dx': uniform_dx,
+            }
        E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
        H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
        self.sources['E'] = E_source
@ -214,27 +229,28 @@ class Simulation(object):
        if bloch_boundaries:
            bloch_args = jinja_args.copy()
            bloch_args['do_poynting'] = False
-            bloch_args['bloch'] = [{'axis': b['axis'],
-                                    'real': b['imag'],
-                                    'imag': b['real']}
-                                   for b in bloch_boundaries]
+            bloch_args['bloch'] = [{
+                    'axis': b['axis'],
+                    'real': b['imag'],
+                    'imag': b['real'],
+                    }
+                for b in bloch_boundaries]
            F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
            G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
            self.sources['F'] = F_source
            self.sources['G'] = G_source

-
-        S_fields = OrderedDict()
+        S_fields = {}
        if do_poynting:
-            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
-            self.sources['S'] = S_source
-
-            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
            self.S = pyopencl.array.zeros_like(self.E)
-            S_fields[ptr('oS')] = self.oS
            S_fields[ptr('S')] = self.S
+        if do_poynting_halves:
+            self.S0 = pyopencl.array.zeros_like(self.E)
+            self.S1 = pyopencl.array.zeros_like(self.E)
+            S_fields[ptr('S0')] = self.S0
+            S_fields[ptr('S1')] = self.S1

-        J_fields = OrderedDict()
+        J_fields = {}
        if do_fieldsrc:
            J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
            self.sources['J'] = J_source
@ -244,40 +260,36 @@ class Simulation(object):
            J_fields[ptr('Jr')] = self.Jr
            J_fields[ptr('Ji')] = self.Ji

-
-        '''
-        PML
-        '''
+        #
+        # PML
+        #
        pml_e_fields, pml_h_fields = self._create_pmls(pmls)
        if bloch_boundaries:
            pml_f_fields, pml_g_fields = self._create_pmls(pmls)

-        '''
-        Create operations
-        '''
+        #
+        # Create operations
+        #
        self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
        self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
-        if do_poynting:
-            self.update_S = self._create_operation(S_source, (base_fields, S_fields))
        if bloch_boundaries:
            self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
            self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
        if do_fieldsrc:
-            args = OrderedDict()
-            [args.update(d) for d in (base_fields, J_fields)]
+            args = base_fields | J_fields
            var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
            update = ElementwiseKernel(self.context, operation=J_source,
                                       arguments=', '.join(list(args.keys()) + var_args))
            self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)

-
-    def _create_pmls(self, pmls):
+    def _create_pmls(self, pmls: Sequence[dict[str, float]]) -> tuple[dict[str, pyopencl.array.Array], ...]:
        ctype = type_to_C(self.arg_type)
+
        def ptr(arg: str) -> str:
            return ctype + ' *' + arg

-        pml_e_fields = OrderedDict()
-        pml_h_fields = OrderedDict()
+        pml_e_fields = {}
+        pml_h_fields = {}
        for pml in pmls:
            a = 'xyz'.find(pml['axis'])

@ -285,7 +297,9 @@ class Simulation(object):
            kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
            alpha_max = pml['cfs_alpha']

-            def par(x):
+            print(sigma_max, kappa_max, alpha_max, pml['thickness'], self.dt)
+
+            def par(x, pml=pml, sigma_max=sigma_max, kappa_max=kappa_max, alpha_max=alpha_max):     # noqa: ANN001, ANN202
                scaling = (x / pml['thickness']) ** pml['m']
                sigma = scaling * sigma_max
                kappa = 1 + scaling * (kappa_max - 1)
@ -301,8 +315,13 @@ class Simulation(object):
            elif pml['polarity'] == 'n':
                xh -= 0.5

+            logger.debug(f'{pml=}')
+            logger.debug(f'{xe=}')
+            logger.debug(f'{xh=}')
+            logger.debug(f'{par(xe)=}')
+            logger.debug(f'{par(xh)=}')
            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
-            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
+            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh), strict=True):
                pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
                pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)

@ -313,64 +332,98 @@ class Simulation(object):
            psi_shape = list(self.shape)
            psi_shape[a] = pml['thickness']

-            for ne, nh in zip(*psi_names):
+            for ne, nh in zip(*psi_names, strict=True):
                pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
                pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
        return pml_e_fields, pml_h_fields

-    def _create_operation(self, source, args_fields):
-        args = OrderedDict()
-        [args.update(d) for d in args_fields]
-        update = ElementwiseKernel(self.context, operation=source,
-                                   arguments=', '.join(args.keys()))
+    def _create_operation(self, source: str, args_fields: Sequence[dict[str, pyopencl.array.Array]]) -> Callable[..., pyopencl.Event]:
+        args = {}
+        for d in args_fields:
+            args.update(d)
+        update = ElementwiseKernel(
+            self.context,
+            operation=source,
+            arguments=', '.join(args.keys()),
+            )
        return lambda e: update(*args.values(), wait_for=e)

-    def _create_context(self, context: pyopencl.Context = None,
-                        queue: pyopencl.CommandQueue = None):
-        if context is None:
-            self.context = pyopencl.create_some_context()
-        else:
-            self.context = context
+    def _create_context(
+            self,
+            context: pyopencl.Context | None = None,
+            queue: pyopencl.CommandQueue | None = None,
+            ) -> None:
+        self.context = context or pyopencl.create_some_context()
+        self.queue = queue or pyopencl.CommandQueue(self.context)

-        if queue is None:
-            self.queue = pyopencl.CommandQueue(self.context)
-        else:
-            self.queue = queue
-
-    def _create_eps(self, epsilon: List[numpy.ndarray]):
+    def _create_eps(self, epsilon: NDArray) -> pyopencl.array.Array:
        if len(epsilon) != 3:
-            raise Exception('Epsilon must be a list with length of 3')
-        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
-            raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+            raise FDTDError('Epsilon must be a list with length of 3')
+        if not all(e.shape == epsilon[0].shape for e in epsilon[1:]):
+            raise FDTDError('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
        if not epsilon[0].shape == self.shape:
-            raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
+            raise FDTDError(f'Epsilon shape mismatch. Expected {self.shape}, got {epsilon[0].shape}')
        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))

-    def _create_field(self, initial_value: List[numpy.ndarray] = None):
+    def _create_field(self, initial_value: NDArray | None = None) -> pyopencl.array.Array:
        if initial_value is None:
            return pyopencl.array.zeros_like(self.eps)
-        else:
-            if len(initial_value) != 3:
-                Exception('Initial field value must be a list of length 3')
-            if not all((f.shape == self.shape for f in initial_value)):
-                Exception('Initial field list elements must have same shape as epsilon elements')
-            return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
+        if len(initial_value) != 3:
+            raise FDTDError('Initial field value must be a list of length 3')
+        if not all(f.shape == self.shape for f in initial_value):
+            raise FDTDError('Initial field list elements must have same shape as epsilon elements')
+        return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
+
+    def _create_inv_dxes(self, dxes: list[list[NDArray]]) -> pyopencl.array.Array:
+        if len(dxes[0]) or len(dxes[1]) != 3:
+            raise FDTDError('dxes must be two lists of 3 1D ndarrays each')
+        if self.shape != tuple(len(dx) for dx in dxes[0]):
+            raise FDTDError('dxes must be compatible with shape of epsilon')
+        if self.shape != tuple(len(dx) for dx in dxes[1]):
+            raise FDTDError('dxes must be compatible with shape of epsilon')
+        self.inv_dxes = [
+            pyopencl.array.to_device(self.queue, (1 / dxn).astype(self.arg_type))
+            for dxn in chain(dxes[0], dxes[1])]


-def type_to_C(float_type) -> str:
+def type_to_C(
+        float_type: type,
+        ) -> str:
    """
    Returns a string corresponding to the C equivalent of a numpy type.
    Only works for float16, float32, float64.

-    :param float_type: e.g. numpy.float32
-    :return: string containing the corresponding C type (eg. 'double')
+    Args:
+        float_type: e.g. numpy.float32
+
+    Returns:
+        string containing the corresponding C type (eg. 'double')
    """
-    if float_type == numpy.float16:
-        arg_type = 'half'
-    elif float_type == numpy.float32:
-        arg_type = 'float'
-    elif float_type == numpy.float64:
-        arg_type = 'double'
-    else:
-        raise Exception('Unsupported type')
-    return arg_type
+    types = {
+        numpy.float16: 'half',
+        numpy.float32: 'float',
+        numpy.float64: 'double',
+        numpy.complex64: 'cfloat_t',
+        numpy.complex128: 'cdouble_t',
+    }
+    if float_type not in types:
+        raise FDTDError(f'Unsupported type: {float_type}')
+
+    return types[float_type]
+
+#            def par(x):
+#                scaling = ((x / (pml['thickness'])) ** pml['m'])
+#                print('scaling', scaling)
+#                print('sigma_max * dt / 2', sigma_max * self.dt / 2)
+#                print('kappa_max', kappa_max)
+#                print('m', pml['m'])
+#                sigma = scaling * sigma_max
+#                kappa = 1 + scaling * (kappa_max - 1)
+#                alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
+#                p0 = 1/(1 + self.dt * (alpha + sigma / kappa))
+#                p1 = self.dt * sigma / kappa * p0
+#                p2 = 1/kappa
+#                print(p0.min(), p0.max(), p1.min(), p1.max())
+#                return p0, p1, p2
+#
+#
--- a/opencl_fdtd/test_simulation.py
+++ b/opencl_fdtd/test_simulation.py
@ -0,0 +1,382 @@
+import unittest
+import numpy
+
+from opencl_fdtd import Simulation
+from meanas import fdtd
+
+
+class BasicTests():
+    def test_initial_fields(self):
+        # Make sure initial fields didn't change
+        e0 = self.es[0]
+        h0 = self.hs[0]
+        mask = self.src_mask
+
+        self.assertEqual(e0[mask], self.j_mag / self.epsilon[mask])
+        self.assertFalse(e0[~mask].any())
+        self.assertFalse(h0.any())
+
+
+    def test_initial_energy(self):
+        e0 = self.es[0]
+        h0 = self.hs[0]
+        h1 = self.hs[1]
+        mask = self.src_mask[1]
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in e0.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+        u0 = self.j_mag * self.j_mag / self.epsilon[self.src_mask] * dV[mask]
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
+
+        # Make sure initial energy and E dot J are correct
+        energy0 = fdtd.energy_estep(h0=h0, e1=e0, h2=self.hs[1], **args)
+        e_dot_j_0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes)
+        self.assertTrue(numpy.allclose(energy0[mask], u0))
+        self.assertFalse(energy0[~mask].any(), msg=f'{energy0=}')
+        self.assertTrue(numpy.allclose(e_dot_j_0[mask], u0))
+        self.assertFalse(e_dot_j_0[~mask].any(), msg=f'{e_dot_j_0=}')
+
+
+    def test_energy_conservation(self):
+        e0 = self.es[0]
+        u0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes).sum()
+        args = {'dxes': self.dxes,
+                'epsilon': self.epsilon}
+
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+                self.assertTrue(numpy.allclose(u_hstep.sum(), u0), msg=f'u_hstep: {u_hstep.sum()}\n{numpy.moveaxis(u_hstep, -1, 0)}')
+                self.assertTrue(numpy.allclose(u_estep.sum(), u0), msg=f'u_estep: {u_estep.sum()}\n{numpy.moveaxis(u_estep, -1, 0)}')
+
+
+    def test_poynting(self):
+        for ii in range(1, 3):
+            with self.subTest(i=ii):
+                s = fdtd.poynting(e=self.es[ii], h=self.hs[ii+1] + self.hs[ii])
+                sf = numpy.moveaxis(s, -1, 0)
+                ss = numpy.moveaxis(self.ss[ii], -1, 0)
+                self.assertTrue(numpy.allclose(s, self.ss[ii], rtol=1e-4),
+                                msg=f'From ExH:\n{sf}\nFrom sim.S:\n{ss}')
+
+
+    def test_poynting_divergence(self):
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
+
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+
+        u_eprev = None
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+
+                du_half_h2e = u_estep - u_hstep
+                div_s_h2e = self.dt * fdtd.poynting_divergence(e=self.es[ii], h=self.hs[ii], dxes=self.dxes) * dV
+
+                du_half_h2e_f = numpy.moveaxis(du_half_h2e, -1, 0)
+                div_s_h2e_f = -numpy.moveaxis(div_s_h2e, -1, 0)
+                self.assertTrue(numpy.allclose(du_half_h2e, -div_s_h2e, rtol=1e-4),
+                                msg=f'du_half_h2e\n{du_half_h2e_f}\ndiv_s_h2e\n{div_s_h2e_f}')
+
+                if u_eprev is None:
+                    u_eprev = u_estep
+                    continue
+
+                # previous half-step
+                du_half_e2h = u_hstep - u_eprev
+                div_s_e2h = self.dt * fdtd.poynting_divergence(e=self.es[ii-1], h=self.hs[ii], dxes=self.dxes) * dV
+                du_half_e2h_f = numpy.moveaxis(du_half_e2h, -1, 0)
+                div_s_e2h_f = -numpy.moveaxis(div_s_e2h, -1, 0)
+                self.assertTrue(numpy.allclose(du_half_e2h, -div_s_e2h, rtol=1e-4),
+                                msg=f'du_half_e2h\n{du_half_e2h_f}\ndiv_s_e2h\n{div_s_e2h_f}')
+                u_eprev = u_estep
+
+
+    def test_poynting_planes(self):
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+
+        mx = numpy.roll(self.src_mask, (-1, -1), axis=(0, 1))
+        my = numpy.roll(self.src_mask, -1, axis=2)
+        mz = numpy.roll(self.src_mask, (+1, -1), axis=(0, 3))
+        px = numpy.roll(self.src_mask, -1, axis=0)
+        py = self.src_mask.copy()
+        pz = numpy.roll(self.src_mask, +1, axis=0)
+
+        u_eprev = None
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+
+                s_h2e = -fdtd.poynting(e=self.es[ii], h=self.hs[ii]) * self.dt
+                s_h2e[0] *= dxes[0][1][None, :, None] * dxes[0][2][None, None, :]
+                s_h2e[1] *= dxes[0][0][:, None, None] * dxes[0][2][None, None, :]
+                s_h2e[2] *= dxes[0][0][:, None, None] * dxes[0][1][None, :, None]
+                planes = [s_h2e[px].sum(), -s_h2e[mx].sum(),
+                          s_h2e[py].sum(), -s_h2e[my].sum(),
+                          s_h2e[pz].sum(), -s_h2e[mz].sum()]
+                du = (u_estep - u_hstep)[self.src_mask[1]]
+                self.assertTrue(numpy.allclose(sum(planes), (u_estep - u_hstep)[self.src_mask[1]]),
+                                msg=f'planes: {planes} (sum: {sum(planes)})\n du:\n {du}')
+
+                if u_eprev is None:
+                    u_eprev = u_estep
+                    continue
+
+                s_e2h = -fdtd.poynting(e=self.es[ii - 1], h=self.hs[ii]) * self.dt
+                s_e2h[0] *= dxes[0][1][None, :, None] * dxes[0][2][None, None, :]
+                s_e2h[1] *= dxes[0][0][:, None, None] * dxes[0][2][None, None, :]
+                s_e2h[2] *= dxes[0][0][:, None, None] * dxes[0][1][None, :, None]
+                planes = [s_e2h[px].sum(), -s_e2h[mx].sum(),
+                          s_e2h[py].sum(), -s_e2h[my].sum(),
+                          s_e2h[pz].sum(), -s_e2h[mz].sum()]
+                du = (u_hstep - u_eprev)[self.src_mask[1]]
+                self.assertTrue(numpy.allclose(sum(planes), (u_hstep - u_eprev)[self.src_mask[1]]),
+                                msg=f'planes: {du} (sum: {sum(planes)})\n du:\n {du}')
+
+                # previous half-step
+                u_eprev = u_estep
+
+
+class Basic2DNoDXOnlyVacuum(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 1]
+        self.dt = 0.5
+        self.epsilon = numpy.ones(shape, dtype=float)
+        self.j_mag = 32
+        self.dxes = None
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 0] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic2DUniformDX3(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 1, 5]
+        self.dt = 0.5
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.full(s, 2.0) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 0, 2] = True
+
+        self.epsilon = numpy.full(shape, 1, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDXOnlyVacuum(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.epsilon = numpy.ones(shape, dtype=float)
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDXUniformN(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.epsilon = numpy.full(shape, 2.5, dtype=float)
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDX(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.33
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        self.epsilon = numpy.full(shape, 1, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class JdotE_3DUniformDX(unittest.TestCase):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.full(s, 2.0) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        self.epsilon = numpy.full(shape, 4, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        rng = numpy.random.default_rng()
+        e = rng.integers(-128, 128 + 1, size=shape, dtype=float)
+        h = rng.integers(-128, 128 + 1, size=shape, dtype=float)
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True, float_type=numpy.float64)
+
+        eh2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
+        eh2e = fdtd.maxwell_e(dt=self.dt, dxes=self.dxes)
+        for ii in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+
+            if ii == 1:
+                nn = numpy.where(self.src_mask.flat)[0][0]
+                self.e_before = sim.E.get().reshape(shape)
+                sim.E[nn] += self.j_mag / self.epsilon[self.src_mask][0]
+                self.e_after = sim.E.get().reshape(shape)
+                self.j_dot_e = self.j_mag * e[self.src_mask]
+
+            self.hs.append(sim.H.get().reshape(shape))
+            self.es.append(sim.E.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+    def test_j_dot_e(self):
+        h0 = self.hs[2]
+        e0 = self.es[1]
+        e1 = self.es[2]
+        j0 = numpy.zeros_like(e0)
+        j0[self.src_mask] = self.j_mag
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
+        e2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
+
+        #ee = j0 * (2 * e0 - j0)
+        #hh = h0 * e2h(j0, numpy.zeros_like(h0))
+        #u0 = fdtd.dxmul(ee, hh, **args)
+        u0 = fdtd.delta_energy_j(j0=j0, e1=(e0 + e1), dxes=self.dxes)
+
+        #uh = [fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args) for ii in range(1, 9)]
+        #ue = [fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args) for ii in range(8)]
+        #uht = [uu.sum() for uu in uh]
+        #uet = [uu.sum() for uu in ue]
+
+        u_hstep = fdtd.energy_hstep(e0=self.es[0], h1=self.hs[1], e2=self.es[1], **args)
+        u_estep = fdtd.energy_estep(h0=self.hs[-2], e1=self.es[-2], h2=self.hs[-1], **args)
+        #breakpoint()
+        du = (u_estep - u_hstep).sum()
+        self.assertTrue(numpy.allclose(u0.sum(), (u_estep - u_hstep).sum()), msg=f'{u0.sum()} != {du}')
--- a/pcgen.py
+++ b/pcgen.py
@ -1,213 +0,0 @@
-"""
-Routines for creating normalized 2D lattices and common photonic crystal
- cavity designs.
-"""
-
-from typing import List
-
-import numpy
-
-
-def triangular_lattice(dims: List[int],
-                       asymmetrical: bool=False
-                       ) -> numpy.ndarray:
-    """
-    Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
-     a triangular lattice in 2D. The lattice will be centered around (0, 0),
-     unless asymmetrical=True in which case there will be extra holes in the +x
-     direction.
-
-    :param dims: Number of lattice sites in the [x, y] directions.
-    :param asymmetrical: If true, each row in x will contain the same number of
-            lattice sites. If false, the structure is symmetrical around (0, 0).
-    :return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
-    """
-    dims = numpy.array(dims, dtype=int)
-
-    if asymmetrical:
-        k = 0
-    else:
-        k = 1
-
-    positions = []
-    ymax = (dims[1] - 1)/2
-    for j in numpy.linspace(-ymax, ymax, dims[0]):
-        j_odd = numpy.floor(j) % 2
-
-        x_offset = j_odd * 0.5
-        y_offset = j * numpy.sqrt(3)/2
-
-        num_x = dims[0] - k * j_odd
-        xmax = (dims[0] - 1)/2
-        xs = numpy.linspace(-xmax, xmax - k * j_odd, num_x) + x_offset
-        ys = numpy.full_like(xs, y_offset)
-
-        positions += [numpy.vstack((xs, ys)).T]
-
-    xy = numpy.vstack(tuple(positions))
-    return xy[xy[:, 0].argsort(), ]
-
-
-def square_lattice(dims: List[int]) -> numpy.ndarray:
-    """
-    Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
-     a square lattice in 2D. The lattice will be centered around (0, 0).
-
-    :param dims: Number of lattice sites in the [x, y] directions.
-    :return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
-    """
-    xs, ys = numpy.meshgrid(range(dims[0]), range(dims[1]), 'xy')
-    xs -= dims[0]/2
-    ys -= dims[1]/2
-    xy = numpy.vstack((xs.flatten(), ys.flatten())).T
-    return xy[xy[:, 0].argsort(), ]
-
-# ### Photonic crystal functions ###
-
-
-def nanobeam_holes(a_defect: float,
-                   num_defect_holes: int,
-                   num_mirror_holes: int
-                   ) -> numpy.ndarray:
-    """
-    Returns a list of [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
-     Creates a region in which the lattice constant and radius are progressively
-     (linearly) altered over num_defect_holes holes until they reach the value
-     specified by a_defect, then symmetrically returned to a lattice constant and
-     radius of 1, which is repeated num_mirror_holes times on each side.
-
-    :param a_defect: Minimum lattice constant for the defect, as a fraction of the
-            mirror lattice constant (ie., for no defect, a_defect = 1).
-    :param num_defect_holes: How many holes form the defect (per-side)
-    :param num_mirror_holes: How many holes form the mirror (per-side)
-    :return: Ndarray [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
-    """
-    a_values = numpy.linspace(a_defect, 1, num_defect_holes, endpoint=False)
-    xs = a_values.cumsum() - (a_values[0] / 2)  # Later mirroring makes center distance 2x as long
-    mirror_xs = numpy.arange(1, num_mirror_holes + 1) + xs[-1]
-    mirror_rs = numpy.ones_like(mirror_xs)
-    return numpy.vstack((numpy.hstack((-mirror_xs[::-1], -xs[::-1], xs, mirror_xs)),
-                         numpy.hstack((mirror_rs[::-1], a_values[::-1], a_values, mirror_rs)))).T
-
-
-def ln_defect(mirror_dims: List[int], defect_length: int) -> numpy.ndarray:
-    """
-    N-hole defect in a triangular lattice.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param defect_length: Length of defect. Should be an odd number.
-    :return: [[x0, y0], [x1, y1], ...] for all the holes
-    """
-    if defect_length % 2 != 1:
-        raise Exception('defect_length must be odd!')
-    p = triangular_lattice([2 * d + 1 for d in mirror_dims])
-    half_length = numpy.floor(defect_length / 2)
-    hole_nums = numpy.arange(-half_length, half_length + 1)
-    holes_to_keep = numpy.in1d(p[:, 0], hole_nums, invert=True)
-    return p[numpy.logical_or(holes_to_keep, p[:, 1] != 0), ]
-
-
-def ln_shift_defect(mirror_dims: List[int],
-                    defect_length: int,
-                    shifts_a: List[float]=(0.15, 0, 0.075),
-                    shifts_r: List[float]=(1, 1, 1)
-                    ) -> numpy.ndarray:
-    """
-    N-hole defect with shifted holes (intended to give the mode a gaussian profile
-     in real- and k-space so as to improve both Q and confinement). Holes along the
-     defect line are shifted and altered according to the shifts_* parameters.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param defect_length: Length of defect. Should be an odd number.
-    :param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
-    :param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
-    :return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
-    """
-    if not hasattr(shifts_a, "__len__") and shifts_a is not None:
-        shifts_a = [shifts_a]
-    if not hasattr(shifts_r, "__len__") and shifts_r is not None:
-        shifts_r = [shifts_r]
-
-    xy = ln_defect(mirror_dims, defect_length)
-
-    # Add column for radius
-    xyr = numpy.hstack((xy, numpy.ones((xy.shape[0], 1))))
-
-    # Shift holes
-    # Expand shifts as necessary
-    n_shifted = max(len(shifts_a), len(shifts_r))
-
-    tmp_a = numpy.array(shifts_a)
-    shifts_a = numpy.ones((n_shifted, ))
-    shifts_a[:len(tmp_a)] = tmp_a
-
-    tmp_r = numpy.array(shifts_r)
-    shifts_r = numpy.ones((n_shifted, ))
-    shifts_r[:len(tmp_r)] = tmp_r
-
-    x_removed = numpy.floor(defect_length / 2)
-
-    for ind in range(n_shifted):
-        for sign in (-1, 1):
-            x_val = sign * (x_removed + ind + 1)
-            which = numpy.logical_and(xyr[:, 0] == x_val, xyr[:, 1] == 0)
-            xyr[which, ] = (x_val + numpy.sign(x_val) * shifts_a[ind], 0, shifts_r[ind])
-
-    return xyr
-
-
-def r6_defect(mirror_dims: List[int]) -> numpy.ndarray:
-    """
-    R6 defect in a triangular lattice.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :return: [[x0, y0], [x1, y1], ...] specifying hole centers.
-    """
-    xy = triangular_lattice([2 * d + 1 for d in mirror_dims])
-
-    rem_holes_plus = numpy.array([[1, 0],
-                                  [0.5, +numpy.sqrt(3)/2],
-                                  [0.5, -numpy.sqrt(3)/2]])
-    rem_holes = numpy.vstack((rem_holes_plus, -rem_holes_plus))
-
-    for rem_xy in rem_holes:
-        xy = xy[(xy != rem_xy).any(axis=1), ]
-
-    return xy
-
-
-def l3_shift_perturbed_defect(mirror_dims: List[int],
-                              perturbed_radius: float=1.1,
-                              shifts_a: List[float]=(),
-                              shifts_r: List[float]=()
-                              ) -> numpy.ndarray:
-    """
-    3-hole defect with perturbed hole sizes intended to form an upwards-directed
-     beam. Can also include shifted holes along the defect line, intended
-     to give the mode a more gaussian profile to improve Q.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param perturbed_radius: Amount to perturb the radius of the holes used for beam-forming
-    :param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
-    :param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
-    :return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
-    """
-    xyr = ln_shift_defect(mirror_dims, 3, shifts_a, shifts_r)
-
-    abs_x, abs_y = (numpy.fabs(xyr[:, i]) for i in (0, 1))
-
-    # Sorted unique xs and ys
-    # Ignore row y=0 because it might have shifted holes
-    xs = numpy.unique(abs_x[abs_x != 0])
-    ys = numpy.unique(abs_y)
-
-    # which holes should be perturbed? (xs[[3, 7]], ys[1]) and (xs[[2, 6]], ys[2])
-    perturbed_holes = ((xs[a], ys[b]) for a, b in ((3, 1), (7, 1), (2, 2), (6, 2)))
-    for row in xyr:
-        if numpy.fabs(row) in perturbed_holes:
-            row[2] = perturbed_radius
-    return xyr
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,93 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "opencl_fdtd"
+description = "OpenCL FDTD solver"
+readme = "README.md"
+license = { file = "LICENSE.md" }
+authors = [
+    { name="Jan Petykiewicz", email="jan@mpxd.net" },
+    ]
+homepage = "https://mpxd.net/code/jan/opencl_fdtd"
+repository = "https://mpxd.net/code/jan/opencl_fdtd"
+keywords = [
+    "FDTD",
+    "finite",
+    "difference",
+    "time",
+    "domain",
+    "simulation",
+    "optics",
+    "electromagnetic",
+    "dielectric",
+    ]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Manufacturing",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3",
+    "Topic :: Scientific/Engineering",
+    ]
+requires-python = ">=3.11"
+dynamic = ["version"]
+dependencies = [
+    "numpy>=1.26",
+    "pyopencl",
+    "jinja2",
+    "meanas>=0.3",
+    ]
+
+[tool.hatch.version]
+path = "opencl_fdtd/__init__.py"
+
+
+[tool.ruff]
+exclude = [
+    ".git",
+    "dist",
+    ]
+line-length = 145
+indent-width = 4
+lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+lint.select = [
+    "NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG",
+    "C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT",
+    "ARG", "PL", "R", "TRY",
+    "G010", "G101", "G201", "G202",
+    "Q002", "Q003", "Q004",
+    ]
+lint.ignore = [
+    #"ANN001",   # No annotation
+    "ANN002",   # *args
+    "ANN003",   # **kwargs
+    "ANN401",   # Any
+    "ANN101",   # self: Self
+    "SIM108",   # single-line if / else assignment
+    "RET504",   # x=y+z; return x
+    "PIE790",   # unnecessary pass
+    "ISC003",   # non-implicit string concatenation
+    "C408",     # dict(x=y) instead of {'x': y}
+    "PLR09",    # Too many xxx
+    "PLR2004",  # magic number
+    "PLC0414",  # import x as x
+    "TRY003",   # Long exception message
+    ]
+
+
+[[tool.mypy.overrides]]
+module = [
+    "scipy",
+    "scipy.optimize",
+    "scipy.linalg",
+    "scipy.sparse",
+    "scipy.sparse.linalg",
+    "pyopencl",
+    "pyopencl.array",
+    "pyopencl.elementwise",
+    "pyopencl.reduction",
+    ]
+ignore_missing_imports = true
--- a/setup.py
+++ b/setup.py
@ -1,30 +0,0 @@
-#!/usr/bin/env python3
-
-from setuptools import setup, find_packages
-import opencl_fdtd
-
-with open('README.md', 'r') as f:
-    long_description = f.read()
-
-setup(name='opencl_fdtd',
-      version=opencl_fdtd.version,
-      description='OpenCL FDTD solver',
-      long_description=long_description,
-      long_description_content_type='text/markdown',
-      author='Jan Petykiewicz',
-      author_email='anewusername@gmail.com',
-      url='https://mpxd.net/code/jan/opencl_fdtd',
-      packages=find_packages(),
-      package_data={
-          'opencl_fdfd': ['kernels/*']
-      },
-      install_requires=[
-            'numpy',
-            'pyopencl',
-            'jinja2',
-            'fdfd_tools>=0.3',
-      ],
-      extras_require={
-      },
-      )
-
Author	SHA1	Message	Date
Jan Petykiewicz	2f37d97c3e	autodetect uniform dxes, and fix nonuniform dxes setup	2025-01-31 20:17:15 -08:00
Jan Petykiewicz	5b16295769	update deps	2024-07-30 23:19:31 -07:00
Jan Petykiewicz	fe684d83d3	add ruff and mypy configs	2024-07-30 23:19:21 -07:00
Jan Petykiewicz	7f620a9bb3	modern rng approach	2024-07-30 23:19:07 -07:00
Jan Petykiewicz	50b30d31fb	fixes driven by ruff & mypy	2024-07-30 23:18:54 -07:00
Jan Petykiewicz	b703f1ee20	drop object superclass	2024-01-10 20:57:40 -08:00
Jan Petykiewicz	5b1e758c27	typo fix	2024-01-10 20:57:23 -08:00
Jan Petykiewicz	d0011cb1f9	type modernization	2024-01-10 20:54:12 -08:00
Jan Petykiewicz	654f9c165b	formatting	2024-01-10 20:52:52 -08:00
jan	689b3176cc	style and type fixes	2022-11-24 16:12:40 -08:00
Jan Petykiewicz	77c10feead	gitignore caches	2022-11-22 14:31:57 -08:00
Jan Petykiewicz	08a9328390	move to hatch-based build	2022-11-22 14:31:43 -08:00
Jan Petykiewicz	f489eb6e06	fix template loading	2021-10-31 19:48:45 -07:00
Jan Petykiewicz	b2043ce841	add comment about pml coords	2021-10-31 19:48:24 -07:00
Jan Petykiewicz	c2e5c94e14	remove unused pcgen.py	2021-10-31 19:48:05 -07:00
Jan Petykiewicz	c75a7e67c9	f-stringify	2021-10-31 19:47:48 -07:00
Jan Petykiewicz	26ae8578c9	update to use gridlock >=1.0	2021-10-31 19:47:25 -07:00
Jan Petykiewicz	4c81f33d05	vec/unvec live in meanas.fdmath	2021-07-11 17:35:25 -07:00
Jan Petykiewicz	1403b9e1a3	vec comes from fdmath	2021-07-11 17:32:14 -07:00
Jan Petykiewicz	e58efd2127	depend on meanas	2021-07-11 17:06:52 -07:00
Jan Petykiewicz	a6384429ee	use new email	2021-07-11 17:06:43 -07:00
Jan Petykiewicz	292fde05d5	convert docstring to cleaner style	2021-07-11 17:06:32 -07:00
Jan Petykiewicz	bbd33844b3	add return type annotation	2021-07-11 17:05:55 -07:00
Jan Petykiewicz	2b96e5dd84	simplify print	2021-07-11 17:05:42 -07:00
Jan Petykiewicz	922161c0e6	improve type annotation	2021-07-11 17:05:17 -07:00
Jan Petykiewicz	da3216133a	clarify that just a plain sum is used (rather than mean)	2021-07-11 17:05:02 -07:00
Jan Petykiewicz	45e047c50e	Move to VERSION.py approach for version	2021-07-11 17:04:18 -07:00
Jan Petykiewicz	8afe1d1f26	style fixes per flake8	2020-10-16 19:32:53 -07:00
Jan Petykiewicz	8cbb0e9864	call update_s during loop	2020-10-16 19:32:35 -07:00
Jan Petykiewicz	4e0bd8b3c6	switch to meanas	2020-10-16 19:32:22 -07:00
Jan Petykiewicz	a4dd031666	ongoing	2019-08-30 22:13:26 -07:00
Jan Petykiewicz	0c7b2fd3a2	add test_simulation	2019-07-30 00:33:35 -07:00
Jan Petykiewicz	23d65d6ebb	Add poynting halves maybe will remove this later... dunno if it's actually useful	2019-07-30 00:33:07 -07:00
Jan Petykiewicz	cf0db63a3f	Revert "Add restrict keyword to pointers (not sharing the same memory for multiple fields)" This reverts commit 60b70bb332d29fb5a2aecb801517be659cf4daf9. It appears to have minimal performance impact, and fails to compile on nvidia cards.	2019-07-17 00:53:55 -07:00
Jan Petykiewicz	89d99187b6	Remove unused code	2019-07-17 00:52:31 -07:00
Jan Petykiewicz	1627d0da05	Consolidate poynting code into update_h	2019-07-15 00:11:09 -07:00