diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..0042015
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,29 @@
+[flake8]
+ignore =
+    # E501 line too long
+    E501,
+    # W391 newlines at EOF
+    W391,
+    # E241 multiple spaces after comma
+    E241,
+    # E302 expected 2 newlines
+    E302,
+    # W503 line break before binary operator (to be deprecated)
+    W503,
+    # E265 block comment should start with '# '
+    E265,
+    # E123 closing bracket does not match indentation of opening bracket's line
+    E123,
+    # E124 closing bracket does not match visual indentation
+    E124,
+    # E221 multiple spaces before operator
+    E221,
+    # E201 whitespace after '['
+    E201,
+    # E741 ambiguous variable name 'I'
+    E741,
+
+
+per-file-ignores =
+    # F401 import without use
+    */__init__.py: F401,
diff --git a/.gitignore b/.gitignore
index 5298f42..9ad27c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,11 @@
 .idea/
 *.h5
 
-__pycache__
+__pycache__/
 *.py[cod]
 build/
 dist/
 *.egg-info/
 
+.pytest_cache/
+.mypy_cache/
diff --git a/fdtd.py b/fdtd.py
index 99af07b..e311c0b 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -7,6 +7,7 @@ See main() for simulation setup.
 import sys
 import time
 import logging
+import pyopencl
 
 import numpy
 import lzma
@@ -16,7 +17,7 @@ from opencl_fdtd import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
-import fdfd_tools
+import meanas
 
 
 __author__ = 'Jan Petykiewicz'
@@ -29,60 +30,69 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
     """
     Generate a masque.Pattern object containing a perturbed L3 cavity.
 
-    :param a: Lattice constant.
-    :param radius: Hole radius, in units of a (lattice constant).
-    :param kwargs: Keyword arguments:
-        hole_dose, trench_dose, hole_layer, trench_layer: Shape properties for Pattern.
-                Defaults *_dose=1, hole_layer=0, trench_layer=1.
-        shifts_a, shifts_r: passed to pcgen.l3_shift; specifies lattice constant (1 -
-                multiplicative factor) and radius (multiplicative factor) for shifting
-                holes adjacent to the defect (same row). Defaults are 0.15 shift for
-                first hole, 0.075 shift for third hole, and no radius change.
+    Args:
+        a: Lattice constant.
+        radius: Hole radius, in units of a (lattice constant).
+        hole_dose: Dose for all holes. Default 1.
+        trench_dose: Dose for undercut trenches. Default 1.
+        hole_layer: Layer for holes. Default (0, 0).
+        trench_layer: Layer for undercut trenches. Default (1, 0).
+        shifts_a: passed to pcgen.l3_shift
+        shifts_r: passed to pcgen.l3_shift
         xy_size: [x, y] number of mirror periods in each direction; total size is
-                2 * n + 1 holes in each direction. Default [10, 10].
+                2 * n + 1 holes in each direction. Default (10, 10).
         perturbed_radius: radius of holes perturbed to form an upwards-driected beam
                 (multiplicative factor). Default 1.1.
         trench width: Width of the undercut trenches. Default 1.2e3.
-    :return: masque.Pattern object containing the L3 design
+
+    Returns:
+        `masque.Pattern` object containing the L3 design
     """
 
-    default_args = {'hole_dose':    1,
-                    'trench_dose':  1,
-                    'hole_layer':   0,
-                    'trench_layer': 1,
-                    'shifts_a':     (0.15, 0, 0.075),
-                    'shifts_r':     (1.0, 1.0, 1.0),
-                    'xy_size':      (10, 10),
-                    'perturbed_radius': 1.1,
-                    'trench_width': 1.2e3,
-                    }
+    default_args = {
+        'hole_dose':    1,
+        'trench_dose':  1,
+        'hole_layer':   0,
+        'trench_layer': 1,
+        'shifts_a':     (0.15, 0, 0.075),
+        'shifts_r':     (1.0, 1.0, 1.0),
+        'xy_size':      (10, 10),
+        'perturbed_radius': 1.1,
+        'trench_width': 1.2e3,
+        }
     kwargs = {**default_args, **kwargs}
 
-    xyr = pcgen.l3_shift_perturbed_defect(mirror_dims=kwargs['xy_size'],
-                                          perturbed_radius=kwargs['perturbed_radius'],
-                                          shifts_a=kwargs['shifts_a'],
-                                          shifts_r=kwargs['shifts_r'])
+    xyr = pcgen.l3_shift_perturbed_defect(
+        mirror_dims=kwargs['xy_size'],
+        perturbed_radius=kwargs['perturbed_radius'],
+        shifts_a=kwargs['shifts_a'],
+        shifts_r=kwargs['shifts_r'],
+        )
     xyr *= a
     xyr[:, 2] *= radius
 
     pat = Pattern()
-    pat.name = 'L3p-a{:g}r{:g}rp{:g}'.format(a, radius, kwargs['perturbed_radius'])
+    pat.name = f'L3p-a{a:g}r{radius:g}rp{kwargs["perturbed_radius"]:g}'
     pat.shapes += [shapes.Circle(radius=r, offset=(x, y),
                                  dose=kwargs['hole_dose'],
                                  layer=kwargs['hole_layer'])
                    for x, y, r in xyr]
 
     maxes = numpy.max(numpy.fabs(xyr), axis=0)
-    pat.shapes += [shapes.Polygon.rectangle(
-        lx=(2 * maxes[0]), ly=kwargs['trench_width'],
-        offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
-        dose=kwargs['trench_dose'], layer=kwargs['trench_layer'])
-                   for s in (-1, 1)]
+    pat.shapes += [
+        shapes.Polygon.rectangle(
+            lx=(2 * maxes[0]),
+            ly=kwargs['trench_width'],
+            offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
+            dose=kwargs['trench_dose'],
+            layer=kwargs['trench_layer'],
+            )
+        for s in (-1, 1)]
     return pat
 
 
 def main():
-    max_t = 8000            # number of timesteps
+    max_t = 4000            # number of timesteps
 
     dx = 25                 # discretization (nm/cell)
     pml_thickness = 8       # (number of cells)
@@ -101,36 +111,43 @@ def main():
     n_air = 1.0   # air
 
     # Half-dimensions of the simulation grid
-    xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
-    z_max = 1.6 * a
-    xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
-
-    # Coordinates of the edges of the cells.
-    #  The fdtd package can only do square grids at the moment.
-    half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
-    edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
+#    xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
+#    z_max = 1.6 * a
+#    xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
+#
+#    # Coordinates of the edges of the cells.
+#    #  The fdtd package can only do square grids at the moment.
+#    half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
+#    edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
+    edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-1, 1), numpy.arange(-100.5, 101)]
+#    edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-100.5, 101), numpy.arange(-1, 1)]
 
     # #### Create the grid, mask, and draw the device ####
-    grid = gridlock.Grid(edge_coords, initial=n_air**2, num_grids=3)
-    grid.draw_slab(surface_normal=gridlock.Direction.z,
-                   center=[0, 0, 0],
-                   thickness=th,
-                   eps=n_slab**2)
-    mask = perturbed_l3(a, r)
+    grid = gridlock.Grid(edge_coords)
+    epsilon = grid.allocate(n_air**2)
+#    grid.draw_slab(epsilon,
+#                   surface_normal=2,
+#                   center=[0, 0, 0],
+#                   thickness=th,
+#                   eps=n_slab**2)
+#    mask = perturbed_l3(a, r)
+#
+#    grid.draw_polygons(epsilon,
+#                       surface_normal=2,
+#                       center=[0, 0, 0],
+#                       thickness=2 * th,
+#                       eps=n_air**2,
+#                       polygons=mask.as_polygons())
 
-    grid.draw_polygons(surface_normal=gridlock.Direction.z,
-                       center=[0, 0, 0],
-                       thickness=2 * th,
-                       eps=n_air**2,
-                       polygons=mask.as_polygons())
-
-    logger.info('grid shape: {}'.format(grid.shape))
+    logger.info(f'grid shape: {grid.shape}')
     # #### Create the simulation grid ####
+#    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
+#            for a in 'xyz' for p in 'np']
     pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
-            for a in 'xyz' for p in 'np']
+            for a in 'xz' for p in 'np']
     #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
     bloch = []
-    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
+    sim = Simulation(epsilon, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
@@ -155,36 +172,88 @@ def main():
             f.write('\n=====================G=====================\n')
             f.write(sim.sources['G'])
 
+
+    planes = numpy.empty((max_t, 4))
+    planes2 = numpy.empty((max_t, 4))
+    Ectr = numpy.empty(max_t)
+    u = numpy.empty(max_t)
+    ui = numpy.empty(max_t)
     # #### Run a bunch of iterations ####
     # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
     #  immediately and run once prev_event is finished.
     start = time.perf_counter()
     for t in range(max_t):
         e = sim.update_E([])
-        if bloch:
-            e = sim.update_F([e])
+#        if bloch:
+#            e = sim.update_F([e])
         e.wait()
 
         ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
-        sim.E[ind] += field_source(t)
+#        sim.E[ind] += field_source(t)
+        if t == 2:
+            sim.E[ind] += 1e6
+
+        h_old = sim.H.copy()
+
         e = sim.update_H([])
-        if bloch:
-            e = sim.update_G([e])
-        if sim.update_S:
-            e = sim.update_S([e])
+#        if bloch:
+#            e = sim.update_G([e])
         e.wait()
 
+        S = sim.S.get().reshape(epsilon.shape) * sim.dt * dx * dx *dx
+        m = 30
+        planes[t] = (
+            S[0][+pml_thickness+2, :, pml_thickness+3:-pml_thickness-3].sum(),
+            S[0][-pml_thickness-2, :, pml_thickness+3:-pml_thickness-3].sum(),
+            S[2][pml_thickness+2:-pml_thickness-2, :, +pml_thickness+2].sum(),
+            S[2][pml_thickness+2:-pml_thickness-2, :, -pml_thickness-2].sum(),
+            )
+        planes2[t] = (
+            S[0][grid.shape[0]//2-1, 0, grid.shape[2]//2].sum(),
+            S[0][grid.shape[0]//2  , 0, grid.shape[2]//2].sum(),
+            S[2][grid.shape[0]//2  , 0, grid.shape[2]//2-1].sum(),
+            S[2][grid.shape[0]//2  , 0, grid.shape[2]//2].sum(),
+            )
+
+#        planes[t] = (
+#            S[0][+pml_thickness+m, pml_thickness+m+1:-pml_thickness-m, :].sum(),
+#            S[0][-pml_thickness-m, pml_thickness+m+1:-pml_thickness-m, :].sum(),
+#            S[1][pml_thickness+1+m:-pml_thickness-m, +pml_thickness+m, :].sum(),
+#            S[1][pml_thickness+1+m:-pml_thickness-m, -pml_thickness-m, :].sum(),
+#            )
+#        planes2[t] = (
+#            S[0][grid.shape[0]//2-1, grid.shape[1]//2  , 0].sum(),
+#            S[0][grid.shape[0]//2  , grid.shape[1]//2  , 0].sum(),
+#            S[1][grid.shape[0]//2  , grid.shape[1]//2-1, 0].sum(),
+#            S[1][grid.shape[0]//2  , grid.shape[1]//2  , 0].sum(),
+#            )
+        Ectr[t] = sim.E[ind].get()
+        u[t] = pyopencl.array.sum(sim.E * sim.E * sim.eps + h_old * sim.H).get() * dx * dx * dx
+        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m, :,
+                                                                                          pml_thickness+m:-pml_thickness-m].sum() * dx * dx * dx
+#        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m,
+#                                                                                          pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
+
         if t % 100 == 0:
-            logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+            avg = (t + 1) / (time.perf_counter() - start)
+            logger.info(f'iteration {t}: average {avg} iterations per sec')
             sys.stdout.flush()
 
     with lzma.open('saved_simulation', 'wb') as f:
         def unvec(f):
-            return fdfd_tools.unvec(f, grid.shape)
+            return meanas.fdmath.unvec(f, grid.shape)
         d = {
             'grid': grid,
+            'epsilon': epsilon,
             'E': unvec(sim.E.get()),
             'H': unvec(sim.H.get()),
+            'dt': sim.dt,
+            'dx': dx,
+            'planes': planes,
+            'planes2': planes2,
+            'Ectr': Ectr,
+            'u': u,
+            'ui': ui,
             }
         if sim.S is not None:
             d['S'] = unvec(sim.S.get())
diff --git a/opencl_fdtd/LICENSE.md b/opencl_fdtd/LICENSE.md
new file mode 120000
index 0000000..7eabdb1
--- /dev/null
+++ b/opencl_fdtd/LICENSE.md
@@ -0,0 +1 @@
+../LICENSE.md
\ No newline at end of file
diff --git a/opencl_fdtd/README.md b/opencl_fdtd/README.md
new file mode 120000
index 0000000..32d46ee
--- /dev/null
+++ b/opencl_fdtd/README.md
@@ -0,0 +1 @@
+../README.md
\ No newline at end of file
diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
index b621c19..5bb721e 100644
--- a/opencl_fdtd/__init__.py
+++ b/opencl_fdtd/__init__.py
@@ -1,5 +1,8 @@
-from .simulation import Simulation, type_to_C
+from .simulation import (
+    Simulation as Simulation,
+    type_to_C as type_to_C,
+    )
 
 __author__ = 'Jan Petykiewicz'
-
-version = '0.4'
+__version__ = '0.4'
+version = __version__
diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 9127181..28cfa80 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -92,7 +92,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
     dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
     dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
-    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; //note u uses v
     {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
     pE{{u}}i {{se}}= {{psi ~ u}}[ip];
     pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 187a18a..8fd6a5a 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -6,13 +6,14 @@
  *   common_header: Rendered contents of common.cl
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
  *      axes, polarities, and thicknesses.
- *   do_poynting: Whether to precalculate poynting vector components (boolean)
+ *   do_poynting: Whether to calculate poynting vector (boolean)
+ *   do_poynting_halves: Whether to calculate half-step poynting vectors (boolean)
  *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
  *      Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
  *      OpenCL args.
  *
  *  OpenCL args:
- *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [S], [S0, S1]
  */
 
 {{common_header}}
@@ -52,24 +53,7 @@ if ({{r}} == s{{r}} - 1) {
     dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
     dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
 }
-{% endfor -%}
-
-{%- if do_poynting %}
-
-
-/*
- *  Precalculate averaged E
- */
-ftype aExy = Ex[i + py] + Ex[i];
-ftype aExz = Ex[i + pz] + Ex[i];
-
-ftype aEyx = Ey[i + px] + Ey[i];
-ftype aEyz = Ey[i + pz] + Ey[i];
-
-ftype aEzx = Ez[i + px] + Ez[i];
-ftype aEzy = Ez[i + py] + Ez[i];
-{%- endif %}
-
+{%- endfor %}
 
 
 
@@ -118,7 +102,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 /*
  *  Update H
  */
-{% if do_poynting -%}
+{% if do_poynting or do_poynting_halves -%}
 // Save old H for averaging
 ftype Hx_old = Hx[i];
 ftype Hy_old = Hy[i];
@@ -131,25 +115,40 @@ Hy[i] -= dt * (dExz - dEzx - pHyi);
 Hz[i] -= dt * (dEyx - dExy - pHzi);
 
 {% if do_poynting -%}
-// Average H across timesteps
-ftype aHxt = Hx[i] + Hx_old;
-ftype aHyt = Hy[i] + Hy_old;
-ftype aHzt = Hz[i] + Hz_old;
-
 /*
- *  Calculate unscaled S components at H locations
+ *  Calculate unscaled S components at ??? locations
+ *  //TODO: document S locations and types
  */
-__global ftype *oSxy = oS + 0 * field_size;
-__global ftype *oSyz = oS + 1 * field_size;
-__global ftype *oSzx = oS + 2 * field_size;
-__global ftype *oSxz = oS + 3 * field_size;
-__global ftype *oSyx = oS + 4 * field_size;
-__global ftype *oSzy = oS + 5 * field_size;
+__global ftype *Sx = S + XX;
+__global ftype *Sy = S + YY;
+__global ftype *Sz = S + ZZ;
 
-oSxy[i] = aEyx * aHzt;
-oSxz[i] = -aEzx * aHyt;
-oSyz[i] = aEzy * aHxt;
-oSyx[i] = -aExy * aHzt;
-oSzx[i] = aExz * aHyt;
-oSzy[i] = -aEyz * aHxt;
+// Sum old and new H
+ftype sHxt = Hx[i] + Hx_old;
+ftype sHyt = Hy[i] + Hy_old;
+ftype sHzt = Hz[i] + Hz_old;
+
+Sx[i] = Ey[i + px] * sHzt - Ez[i + px] * sHyt;
+Sy[i] = Ez[i + py] * sHxt - Ex[i + py] * sHzt;
+Sz[i] = Ex[i + pz] * sHyt - Ey[i + pz] * sHxt;
+{%- endif -%}
+
+{% if do_poynting_halves -%}
+/*
+ *  Calculate unscaled S components at ??? locations
+ *  //TODO: document S locations and types
+ */
+__global ftype *Sx0 = S0 + XX;
+__global ftype *Sy0 = S0 + YY;
+__global ftype *Sz0 = S0 + ZZ;
+__global ftype *Sx1 = S1 + XX;
+__global ftype *Sy1 = S1 + YY;
+__global ftype *Sz1 = S1 + ZZ;
+
+Sx0[i] = Ey[i + px] * Hz_old - Ez[i + px] * Hy_old;
+Sy0[i] = Ez[i + py] * Hx_old - Ex[i + py] * Hz_old;
+Sz0[i] = Ex[i + pz] * Hy_old - Ey[i + pz] * Hx_old;
+Sx1[i] = Ey[i + px] * Hz[i] - Ez[i + px] * Hy[i];
+Sy1[i] = Ez[i + py] * Hx[i] - Ex[i + py] * Hz[i];
+Sz1[i] = Ex[i + pz] * Hy[i] - Ey[i + pz] * Hx[i];
 {%- endif -%}
diff --git a/opencl_fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl
deleted file mode 100644
index 94e061e..0000000
--- a/opencl_fdtd/kernels/update_s.cl
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *  Update E-field, including any PMLs.
- *
- *  Template parameters:
- *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
- *
- *  OpenCL args:
- *   E, H, dt, S, oS
- */
-
-{{common_header}}
-
-//////////////////////////////////////////////////////////////////////
-
-
-/*
- * Calculate S from oS (pre-calculated components)
- */
-__global ftype *Sx = S + XX;
-__global ftype *Sy = S + YY;
-__global ftype *Sz = S + ZZ;
-
-// Use unscaled S components from H locations
-__global ftype *oSxy = oS + 0 * field_size;
-__global ftype *oSyz = oS + 1 * field_size;
-__global ftype *oSzx = oS + 2 * field_size;
-__global ftype *oSxz = oS + 3 * field_size;
-__global ftype *oSyx = oS + 4 * field_size;
-__global ftype *oSzy = oS + 5 * field_size;
-
-ftype s_factor = dt * 0.125;
-Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
-Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
-Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 03b5588..2c337ec 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -2,28 +2,35 @@
 Class for constructing and holding the basic FDTD operations and fields
 """
 
-from typing import List, Dict, Callable
-from collections import OrderedDict
+from collections.abc import Callable, Sequence
 import numpy
+from numpy.typing import NDArray
+from numpy import floating, complexfloating
 import jinja2
 import warnings
+import logging
 
 import pyopencl
 import pyopencl.array
 from pyopencl.elementwise import ElementwiseKernel
 
-from fdfd_tools import vec
+from meanas.fdmath import vec
 
 
-__author__ = 'Jan Petykiewicz'
+logger = logging.getLogger(__name__)
 
 
 # Create jinja2 env on module load
-jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
+jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__.split('.')[0], 'kernels'))
 
 
-class Simulation(object):
-    """
+class FDTDError(Exception):
+    """ Custom exception for opencl_fdtd """
+    pass
+
+
+class Simulation:
+    r"""
     Constructs and holds the basic FDTD operations and related fields
 
     After constructing this object, call the (update_E, update_H, update_S) members
@@ -32,7 +39,7 @@ class Simulation(object):
         pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
         sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
         with open('sources.c', 'w') as f:
-            f.write('{}'.format(sim.sources))
+            f.write(f'{sim.sources}')
 
         for t in range(max_t):
             sim.update_E([]).wait()
@@ -48,7 +55,7 @@ class Simulation(object):
             event.wait()
 
             with lzma.open('saved_simulation', 'wb') as f:
-                dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
+                dill.dump(meanas.fdmath.unvec(sim.E.get(), grid.shape), f)
 
     Code in the form
         event2 = sim.update_H([event0, event1])
@@ -56,70 +63,70 @@ class Simulation(object):
      event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
      event2 can then be used to prepare further operations to be run after update_H.
     """
-    E = None            # type: pyopencl.array.Array
-    H = None            # type: pyopencl.array.Array
-    S = None            # type: pyopencl.array.Array
-    eps = None          # type: pyopencl.array.Array
-    dt = None           # type: float
-    inv_dxes = None     # type: List[pyopencl.array.Array]
+    E: pyopencl.array.Array
+    H: pyopencl.array.Array
+    S: pyopencl.array.Array
+    eps: pyopencl.array.Array
+    dt: float
+    inv_dxes: list[pyopencl.array.Array]
 
-    arg_type = None     # type: numpy.float32 or numpy.float64
+    arg_type: type
 
-    context = None      # type: pyopencl.Context
-    queue = None        # type: pyopencl.CommandQueue
+    context: pyopencl.Context
+    queue: pyopencl.CommandQueue
 
-    update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_J = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    sources = None      # type: Dict[str, str]
+    update_E: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_H: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_S: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_J: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    sources: dict[str, str]
 
-    def __init__(self,
-                 epsilon: List[numpy.ndarray],
-                 pmls: List[Dict[str, int or float]],
-                 bloch_boundaries: List[Dict[str, int or float]] = (),
-                 dxes: List[List[numpy.ndarray]] or float = None,
-                 dt: float = None,
-                 initial_fields: Dict[str, List[numpy.ndarray]] = None,
-                 context: pyopencl.Context = None,
-                 queue: pyopencl.CommandQueue = None,
-                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 do_poynting: bool = True,
-                 do_fieldsrc: bool = False):
+    def __init__(
+            self,
+            epsilon: NDArray,
+            pmls: Sequence[dict[str, float]],
+            bloch_boundaries: Sequence[dict[str, float]] = (),
+            dxes: list[list[NDArray]] | float | None = None,
+            dt: float | None = None,
+            initial_fields: dict[str, NDArray] | None = None,
+            context: pyopencl.Context | None = None,
+            queue: pyopencl.CommandQueue | None = None,
+            float_type: type = numpy.float32,
+            do_poynting: bool = True,
+            do_fieldsrc: bool = False,
+            ) -> None:
         """
         Initialize the simulation.
 
-        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
+        Args:
+            epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
                 spanning the simulation domain. Relative epsilon is used.
-        :param pmls: List of dicts with keys:
-            'axis': One of 'x', 'y', 'z'.
-            'direction': One of 'n', 'p'.
-            'thickness': Number of layers, default 8.
-            'epsilon_eff': Effective epsilon to match to. Default 1.0.
-            'mu_eff': Effective mu to match to. Default 1.0.
-            'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
-            'm': Polynomial grading exponent. Default 3.5.
-            'ma': Exponent for alpha. Default 1.
-        :param bloch_boundaries: List of dicts with keys:
-            'axis': One of 'x', 'y', 'z'.
-            'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
-            'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
-        :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
-        :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
-            specified fields (default is 0 everywhere). Fields have same format as epsilon.
-        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
-        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
-        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
-        :param do_poynting: If true, enables calculation of the poynting vector, S.
+            pmls: List of dicts with keys:
+                'axis': One of 'x', 'y', 'z'.
+                'direction': One of 'n', 'p'.
+                'thickness': Number of layers, default 8.
+                'epsilon_eff': Effective epsilon to match to. Default 1.0.
+                'mu_eff': Effective mu to match to. Default 1.0.
+                'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
+                'm': Polynomial grading exponent. Default 3.5.
+                'ma': Exponent for alpha. Default 1.
+            bloch_boundaries: List of dicts with keys:
+                'axis': One of 'x', 'y', 'z'.
+                'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
+                'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
+            dt: Time step. Default is min(dxes) * .99/sqrt(3).
+            initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
+                specified fields (default is 0 everywhere). Fields have same format as epsilon.
+            context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
+            queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
+            float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+            do_poynting: If True, enables calculation of the poynting vector, S.
                 Poynting vector calculation adds the following computational burdens:
-                    * During update_H, ~6 extra additions/cell are performed in order to spatially
-                        average E and temporally average H. These quantities are multiplied
-                        (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
-                    * update_S performs a discrete cross product using the precalculated products
-                        from update_H. This is not nice to the cache and similar to e.g. update_E
-                        in complexity.
-                    * GPU memory requirements are approximately doubled, since S and the intermediate
-                        products must be stored.
+                * During update_H, 12 extra additions/cell are performed in order to temporally
+                    sum E and H. The results are then multiplied by E (6 multiplications/cell) and
+                    then stored (6 writes/cell, cache-friendly). The E-field components are
+                    reused from the H-field update and do not require additional H
+                * GPU memory requirements increase by 50% (for storing S)
         """
         if initial_fields is None:
             initial_fields = {}
@@ -133,22 +140,26 @@ class Simulation(object):
         if dxes is None:
             dxes = 1.0
 
-        if isinstance(dxes, (float, int)):
+        if isinstance(dxes, float | int):
             uniform_dx = dxes
             min_dx = dxes
+        elif all((dxes[0][0][0] == dx).all() for dx in dxes[0] + dxes[1]):
+            uniform_dx = dxes[0][0][0]
+            min_dx = dxes[0][0][0]
         else:
             uniform_dx = False
-            self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]]
+            self.inv_dxes = self._create_inv_dxes(dxes)
             min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
 
         max_dt = min_dx * .99 / numpy.sqrt(3)
+        logger.info(f'{min_dx=}, {max_dt=}, {dt=}')
 
         if dt is None:
             self.dt = max_dt
         elif dt > max_dt:
-            warnings.warn('Warning: unstable dt: {}'.format(dt))
+            warnings.warn(f'Warning: unstable dt: {dt}', stacklevel=2)
         elif dt <= 0:
-            raise Exception('Invalid dt: {}'.format(dt))
+            raise FDTDError(f'Invalid dt: {dt}')
         else:
             self.dt = dt
 
@@ -170,42 +181,46 @@ class Simulation(object):
         ctype = type_to_C(self.arg_type)
 
         def ptr(arg: str) -> str:
-            return ctype + ' * restrict ' + arg
+            return ctype + ' *' + arg
 
-        base_fields = OrderedDict()
-        base_fields[ptr('E')] = self.E
-        base_fields[ptr('H')] = self.H
-        base_fields[ctype + ' dt'] = self.dt
-        if uniform_dx == False:
+        base_fields = {
+            ptr('E'): self.E,
+            ptr('H'): self.H,
+            ctype + ' dt': self.dt,
+            }
+        if uniform_dx is False:
             inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
-            for name, field in zip(inv_dx_names, self.inv_dxes):
+            for name, field in zip(inv_dx_names, self.inv_dxes, strict=True):
                 base_fields[ptr(name)] = field
 
-        eps_field = OrderedDict()
-        eps_field[ptr('eps')] = self.eps
+        eps_field = {ptr('eps'): self.eps}
 
         if bloch_boundaries:
-            base_fields[ptr('F')] = self.F
-            base_fields[ptr('G')] = self.G
+            base_fields |= {
+                ptr('F'): self.F,
+                ptr('G'): self.G,
+                }
 
-            bloch_fields = OrderedDict()
-            bloch_fields[ptr('E')] = self.F
-            bloch_fields[ptr('H')] = self.G
-            bloch_fields[ctype + ' dt'] = self.dt
-            bloch_fields[ptr('F')] = self.E
-            bloch_fields[ptr('G')] = self.H
+            bloch_fields = {
+                ptr('E'): self.F,
+                ptr('H'): self.G,
+                ctype + ' dt': self.dt,
+                ptr('F'): self.E,
+                ptr('G'): self.H,
+                }
 
         common_source = jinja_env.get_template('common.cl').render(
-                ftype=ctype,
-                shape=self.shape,
-                )
+            ftype=ctype,
+            shape=self.shape,
+            )
         jinja_args = {
-                'common_header': common_source,
-                'pmls': pmls,
-                'do_poynting': do_poynting,
-                'bloch': bloch_boundaries,
-                'uniform_dx': uniform_dx,
-                }
+            'common_header': common_source,
+            'pmls': pmls,
+            'do_poynting': do_poynting,
+            'do_poynting_halves': do_poynting_halves,
+            'bloch': bloch_boundaries,
+            'uniform_dx': uniform_dx,
+            }
         E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
         H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
         self.sources['E'] = E_source
@@ -214,27 +229,28 @@ class Simulation(object):
         if bloch_boundaries:
             bloch_args = jinja_args.copy()
             bloch_args['do_poynting'] = False
-            bloch_args['bloch'] = [{'axis': b['axis'],
-                                    'real': b['imag'],
-                                    'imag': b['real']}
-                                   for b in bloch_boundaries]
+            bloch_args['bloch'] = [{
+                    'axis': b['axis'],
+                    'real': b['imag'],
+                    'imag': b['real'],
+                    }
+                for b in bloch_boundaries]
             F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
             G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
             self.sources['F'] = F_source
             self.sources['G'] = G_source
 
-
-        S_fields = OrderedDict()
+        S_fields = {}
         if do_poynting:
-            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
-            self.sources['S'] = S_source
-
-            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
             self.S = pyopencl.array.zeros_like(self.E)
-            S_fields[ptr('oS')] = self.oS
             S_fields[ptr('S')] = self.S
+        if do_poynting_halves:
+            self.S0 = pyopencl.array.zeros_like(self.E)
+            self.S1 = pyopencl.array.zeros_like(self.E)
+            S_fields[ptr('S0')] = self.S0
+            S_fields[ptr('S1')] = self.S1
 
-        J_fields = OrderedDict()
+        J_fields = {}
         if do_fieldsrc:
             J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
             self.sources['J'] = J_source
@@ -244,40 +260,36 @@ class Simulation(object):
             J_fields[ptr('Jr')] = self.Jr
             J_fields[ptr('Ji')] = self.Ji
 
-
-        '''
-        PML
-        '''
+        #
+        # PML
+        #
         pml_e_fields, pml_h_fields = self._create_pmls(pmls)
         if bloch_boundaries:
             pml_f_fields, pml_g_fields = self._create_pmls(pmls)
 
-        '''
-        Create operations
-        '''
+        #
+        # Create operations
+        #
         self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
         self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
-        if do_poynting:
-            self.update_S = self._create_operation(S_source, (base_fields, S_fields))
         if bloch_boundaries:
             self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
             self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
         if do_fieldsrc:
-            args = OrderedDict()
-            [args.update(d) for d in (base_fields, J_fields)]
+            args = base_fields | J_fields
             var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
             update = ElementwiseKernel(self.context, operation=J_source,
                                        arguments=', '.join(list(args.keys()) + var_args))
             self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
 
-
-    def _create_pmls(self, pmls):
+    def _create_pmls(self, pmls: Sequence[dict[str, float]]) -> tuple[dict[str, pyopencl.array.Array], ...]:
         ctype = type_to_C(self.arg_type)
+
         def ptr(arg: str) -> str:
             return ctype + ' *' + arg
 
-        pml_e_fields = OrderedDict()
-        pml_h_fields = OrderedDict()
+        pml_e_fields = {}
+        pml_h_fields = {}
         for pml in pmls:
             a = 'xyz'.find(pml['axis'])
 
@@ -285,7 +297,9 @@ class Simulation(object):
             kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
             alpha_max = pml['cfs_alpha']
 
-            def par(x):
+            print(sigma_max, kappa_max, alpha_max, pml['thickness'], self.dt)
+
+            def par(x, pml=pml, sigma_max=sigma_max, kappa_max=kappa_max, alpha_max=alpha_max):     # noqa: ANN001, ANN202
                 scaling = (x / pml['thickness']) ** pml['m']
                 sigma = scaling * sigma_max
                 kappa = 1 + scaling * (kappa_max - 1)
@@ -301,8 +315,13 @@ class Simulation(object):
             elif pml['polarity'] == 'n':
                 xh -= 0.5
 
+            logger.debug(f'{pml=}')
+            logger.debug(f'{xe=}')
+            logger.debug(f'{xh=}')
+            logger.debug(f'{par(xe)=}')
+            logger.debug(f'{par(xh)=}')
             pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
-            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
+            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh), strict=True):
                 pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
                 pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)
 
@@ -313,64 +332,98 @@ class Simulation(object):
             psi_shape = list(self.shape)
             psi_shape[a] = pml['thickness']
 
-            for ne, nh in zip(*psi_names):
+            for ne, nh in zip(*psi_names, strict=True):
                 pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
                 pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
         return pml_e_fields, pml_h_fields
 
-    def _create_operation(self, source, args_fields):
-        args = OrderedDict()
-        [args.update(d) for d in args_fields]
-        update = ElementwiseKernel(self.context, operation=source,
-                                   arguments=', '.join(args.keys()))
+    def _create_operation(self, source: str, args_fields: Sequence[dict[str, pyopencl.array.Array]]) -> Callable[..., pyopencl.Event]:
+        args = {}
+        for d in args_fields:
+            args.update(d)
+        update = ElementwiseKernel(
+            self.context,
+            operation=source,
+            arguments=', '.join(args.keys()),
+            )
         return lambda e: update(*args.values(), wait_for=e)
 
-    def _create_context(self, context: pyopencl.Context = None,
-                        queue: pyopencl.CommandQueue = None):
-        if context is None:
-            self.context = pyopencl.create_some_context()
-        else:
-            self.context = context
+    def _create_context(
+            self,
+            context: pyopencl.Context | None = None,
+            queue: pyopencl.CommandQueue | None = None,
+            ) -> None:
+        self.context = context or pyopencl.create_some_context()
+        self.queue = queue or pyopencl.CommandQueue(self.context)
 
-        if queue is None:
-            self.queue = pyopencl.CommandQueue(self.context)
-        else:
-            self.queue = queue
-
-    def _create_eps(self, epsilon: List[numpy.ndarray]):
+    def _create_eps(self, epsilon: NDArray) -> pyopencl.array.Array:
         if len(epsilon) != 3:
-            raise Exception('Epsilon must be a list with length of 3')
-        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
-            raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+            raise FDTDError('Epsilon must be a list with length of 3')
+        if not all(e.shape == epsilon[0].shape for e in epsilon[1:]):
+            raise FDTDError('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
         if not epsilon[0].shape == self.shape:
-            raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
+            raise FDTDError(f'Epsilon shape mismatch. Expected {self.shape}, got {epsilon[0].shape}')
         self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
 
-    def _create_field(self, initial_value: List[numpy.ndarray] = None):
+    def _create_field(self, initial_value: NDArray | None = None) -> pyopencl.array.Array:
         if initial_value is None:
             return pyopencl.array.zeros_like(self.eps)
-        else:
-            if len(initial_value) != 3:
-                Exception('Initial field value must be a list of length 3')
-            if not all((f.shape == self.shape for f in initial_value)):
-                Exception('Initial field list elements must have same shape as epsilon elements')
-            return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
+        if len(initial_value) != 3:
+            raise FDTDError('Initial field value must be a list of length 3')
+        if not all(f.shape == self.shape for f in initial_value):
+            raise FDTDError('Initial field list elements must have same shape as epsilon elements')
+        return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
+
+    def _create_inv_dxes(self, dxes: list[list[NDArray]]) -> pyopencl.array.Array:
+        if len(dxes[0]) or len(dxes[1]) != 3:
+            raise FDTDError('dxes must be two lists of 3 1D ndarrays each')
+        if self.shape != tuple(len(dx) for dx in dxes[0]):
+            raise FDTDError('dxes must be compatible with shape of epsilon')
+        if self.shape != tuple(len(dx) for dx in dxes[1]):
+            raise FDTDError('dxes must be compatible with shape of epsilon')
+        self.inv_dxes = [
+            pyopencl.array.to_device(self.queue, (1 / dxn).astype(self.arg_type))
+            for dxn in chain(dxes[0], dxes[1])]
 
 
-def type_to_C(float_type) -> str:
+def type_to_C(
+        float_type: type,
+        ) -> str:
     """
     Returns a string corresponding to the C equivalent of a numpy type.
     Only works for float16, float32, float64.
 
-    :param float_type: e.g. numpy.float32
-    :return: string containing the corresponding C type (eg. 'double')
+    Args:
+        float_type: e.g. numpy.float32
+
+    Returns:
+        string containing the corresponding C type (eg. 'double')
     """
-    if float_type == numpy.float16:
-        arg_type = 'half'
-    elif float_type == numpy.float32:
-        arg_type = 'float'
-    elif float_type == numpy.float64:
-        arg_type = 'double'
-    else:
-        raise Exception('Unsupported type')
-    return arg_type
+    types = {
+        numpy.float16: 'half',
+        numpy.float32: 'float',
+        numpy.float64: 'double',
+        numpy.complex64: 'cfloat_t',
+        numpy.complex128: 'cdouble_t',
+    }
+    if float_type not in types:
+        raise FDTDError(f'Unsupported type: {float_type}')
+
+    return types[float_type]
+
+#            def par(x):
+#                scaling = ((x / (pml['thickness'])) ** pml['m'])
+#                print('scaling', scaling)
+#                print('sigma_max * dt / 2', sigma_max * self.dt / 2)
+#                print('kappa_max', kappa_max)
+#                print('m', pml['m'])
+#                sigma = scaling * sigma_max
+#                kappa = 1 + scaling * (kappa_max - 1)
+#                alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
+#                p0 = 1/(1 + self.dt * (alpha + sigma / kappa))
+#                p1 = self.dt * sigma / kappa * p0
+#                p2 = 1/kappa
+#                print(p0.min(), p0.max(), p1.min(), p1.max())
+#                return p0, p1, p2
+#
+#
diff --git a/opencl_fdtd/test_simulation.py b/opencl_fdtd/test_simulation.py
new file mode 100644
index 0000000..9c8649a
--- /dev/null
+++ b/opencl_fdtd/test_simulation.py
@@ -0,0 +1,382 @@
+import unittest
+import numpy
+
+from opencl_fdtd import Simulation
+from meanas import fdtd
+
+
+class BasicTests():
+    def test_initial_fields(self):
+        # Make sure initial fields didn't change
+        e0 = self.es[0]
+        h0 = self.hs[0]
+        mask = self.src_mask
+
+        self.assertEqual(e0[mask], self.j_mag / self.epsilon[mask])
+        self.assertFalse(e0[~mask].any())
+        self.assertFalse(h0.any())
+
+
+    def test_initial_energy(self):
+        e0 = self.es[0]
+        h0 = self.hs[0]
+        h1 = self.hs[1]
+        mask = self.src_mask[1]
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in e0.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+        u0 = self.j_mag * self.j_mag / self.epsilon[self.src_mask] * dV[mask]
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
+
+        # Make sure initial energy and E dot J are correct
+        energy0 = fdtd.energy_estep(h0=h0, e1=e0, h2=self.hs[1], **args)
+        e_dot_j_0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes)
+        self.assertTrue(numpy.allclose(energy0[mask], u0))
+        self.assertFalse(energy0[~mask].any(), msg=f'{energy0=}')
+        self.assertTrue(numpy.allclose(e_dot_j_0[mask], u0))
+        self.assertFalse(e_dot_j_0[~mask].any(), msg=f'{e_dot_j_0=}')
+
+
+    def test_energy_conservation(self):
+        e0 = self.es[0]
+        u0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes).sum()
+        args = {'dxes': self.dxes,
+                'epsilon': self.epsilon}
+
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+                self.assertTrue(numpy.allclose(u_hstep.sum(), u0), msg=f'u_hstep: {u_hstep.sum()}\n{numpy.moveaxis(u_hstep, -1, 0)}')
+                self.assertTrue(numpy.allclose(u_estep.sum(), u0), msg=f'u_estep: {u_estep.sum()}\n{numpy.moveaxis(u_estep, -1, 0)}')
+
+
+    def test_poynting(self):
+        for ii in range(1, 3):
+            with self.subTest(i=ii):
+                s = fdtd.poynting(e=self.es[ii], h=self.hs[ii+1] + self.hs[ii])
+                sf = numpy.moveaxis(s, -1, 0)
+                ss = numpy.moveaxis(self.ss[ii], -1, 0)
+                self.assertTrue(numpy.allclose(s, self.ss[ii], rtol=1e-4),
+                                msg=f'From ExH:\n{sf}\nFrom sim.S:\n{ss}')
+
+
+    def test_poynting_divergence(self):
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
+
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+
+        u_eprev = None
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+
+                du_half_h2e = u_estep - u_hstep
+                div_s_h2e = self.dt * fdtd.poynting_divergence(e=self.es[ii], h=self.hs[ii], dxes=self.dxes) * dV
+
+                du_half_h2e_f = numpy.moveaxis(du_half_h2e, -1, 0)
+                div_s_h2e_f = -numpy.moveaxis(div_s_h2e, -1, 0)
+                self.assertTrue(numpy.allclose(du_half_h2e, -div_s_h2e, rtol=1e-4),
+                                msg=f'du_half_h2e\n{du_half_h2e_f}\ndiv_s_h2e\n{div_s_h2e_f}')
+
+                if u_eprev is None:
+                    u_eprev = u_estep
+                    continue
+
+                # previous half-step
+                du_half_e2h = u_hstep - u_eprev
+                div_s_e2h = self.dt * fdtd.poynting_divergence(e=self.es[ii-1], h=self.hs[ii], dxes=self.dxes) * dV
+                du_half_e2h_f = numpy.moveaxis(du_half_e2h, -1, 0)
+                div_s_e2h_f = -numpy.moveaxis(div_s_e2h, -1, 0)
+                self.assertTrue(numpy.allclose(du_half_e2h, -div_s_e2h, rtol=1e-4),
+                                msg=f'du_half_e2h\n{du_half_e2h_f}\ndiv_s_e2h\n{div_s_e2h_f}')
+                u_eprev = u_estep
+
+
+    def test_poynting_planes(self):
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+
+        mx = numpy.roll(self.src_mask, (-1, -1), axis=(0, 1))
+        my = numpy.roll(self.src_mask, -1, axis=2)
+        mz = numpy.roll(self.src_mask, (+1, -1), axis=(0, 3))
+        px = numpy.roll(self.src_mask, -1, axis=0)
+        py = self.src_mask.copy()
+        pz = numpy.roll(self.src_mask, +1, axis=0)
+
+        u_eprev = None
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+
+                s_h2e = -fdtd.poynting(e=self.es[ii], h=self.hs[ii]) * self.dt
+                s_h2e[0] *= dxes[0][1][None, :, None] * dxes[0][2][None, None, :]
+                s_h2e[1] *= dxes[0][0][:, None, None] * dxes[0][2][None, None, :]
+                s_h2e[2] *= dxes[0][0][:, None, None] * dxes[0][1][None, :, None]
+                planes = [s_h2e[px].sum(), -s_h2e[mx].sum(),
+                          s_h2e[py].sum(), -s_h2e[my].sum(),
+                          s_h2e[pz].sum(), -s_h2e[mz].sum()]
+                du = (u_estep - u_hstep)[self.src_mask[1]]
+                self.assertTrue(numpy.allclose(sum(planes), (u_estep - u_hstep)[self.src_mask[1]]),
+                                msg=f'planes: {planes} (sum: {sum(planes)})\n du:\n {du}')
+
+                if u_eprev is None:
+                    u_eprev = u_estep
+                    continue
+
+                s_e2h = -fdtd.poynting(e=self.es[ii - 1], h=self.hs[ii]) * self.dt
+                s_e2h[0] *= dxes[0][1][None, :, None] * dxes[0][2][None, None, :]
+                s_e2h[1] *= dxes[0][0][:, None, None] * dxes[0][2][None, None, :]
+                s_e2h[2] *= dxes[0][0][:, None, None] * dxes[0][1][None, :, None]
+                planes = [s_e2h[px].sum(), -s_e2h[mx].sum(),
+                          s_e2h[py].sum(), -s_e2h[my].sum(),
+                          s_e2h[pz].sum(), -s_e2h[mz].sum()]
+                du = (u_hstep - u_eprev)[self.src_mask[1]]
+                self.assertTrue(numpy.allclose(sum(planes), (u_hstep - u_eprev)[self.src_mask[1]]),
+                                msg=f'planes: {du} (sum: {sum(planes)})\n du:\n {du}')
+
+                # previous half-step
+                u_eprev = u_estep
+
+
+class Basic2DNoDXOnlyVacuum(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 1]
+        self.dt = 0.5
+        self.epsilon = numpy.ones(shape, dtype=float)
+        self.j_mag = 32
+        self.dxes = None
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 0] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic2DUniformDX3(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 1, 5]
+        self.dt = 0.5
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.full(s, 2.0) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 0, 2] = True
+
+        self.epsilon = numpy.full(shape, 1, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDXOnlyVacuum(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.epsilon = numpy.ones(shape, dtype=float)
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDXUniformN(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.epsilon = numpy.full(shape, 2.5, dtype=float)
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDX(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.33
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        self.epsilon = numpy.full(shape, 1, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class JdotE_3DUniformDX(unittest.TestCase):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.full(s, 2.0) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        self.epsilon = numpy.full(shape, 4, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        rng = numpy.random.default_rng()
+        e = rng.integers(-128, 128 + 1, size=shape, dtype=float)
+        h = rng.integers(-128, 128 + 1, size=shape, dtype=float)
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True, float_type=numpy.float64)
+
+        eh2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
+        eh2e = fdtd.maxwell_e(dt=self.dt, dxes=self.dxes)
+        for ii in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+
+            if ii == 1:
+                nn = numpy.where(self.src_mask.flat)[0][0]
+                self.e_before = sim.E.get().reshape(shape)
+                sim.E[nn] += self.j_mag / self.epsilon[self.src_mask][0]
+                self.e_after = sim.E.get().reshape(shape)
+                self.j_dot_e = self.j_mag * e[self.src_mask]
+
+            self.hs.append(sim.H.get().reshape(shape))
+            self.es.append(sim.E.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+    def test_j_dot_e(self):
+        h0 = self.hs[2]
+        e0 = self.es[1]
+        e1 = self.es[2]
+        j0 = numpy.zeros_like(e0)
+        j0[self.src_mask] = self.j_mag
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
+        e2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
+
+        #ee = j0 * (2 * e0 - j0)
+        #hh = h0 * e2h(j0, numpy.zeros_like(h0))
+        #u0 = fdtd.dxmul(ee, hh, **args)
+        u0 = fdtd.delta_energy_j(j0=j0, e1=(e0 + e1), dxes=self.dxes)
+
+        #uh = [fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args) for ii in range(1, 9)]
+        #ue = [fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args) for ii in range(8)]
+        #uht = [uu.sum() for uu in uh]
+        #uet = [uu.sum() for uu in ue]
+
+        u_hstep = fdtd.energy_hstep(e0=self.es[0], h1=self.hs[1], e2=self.es[1], **args)
+        u_estep = fdtd.energy_estep(h0=self.hs[-2], e1=self.es[-2], h2=self.hs[-1], **args)
+        #breakpoint()
+        du = (u_estep - u_hstep).sum()
+        self.assertTrue(numpy.allclose(u0.sum(), (u_estep - u_hstep).sum()), msg=f'{u0.sum()} != {du}')
diff --git a/pcgen.py b/pcgen.py
deleted file mode 100644
index 9481472..0000000
--- a/pcgen.py
+++ /dev/null
@@ -1,213 +0,0 @@
-"""
-Routines for creating normalized 2D lattices and common photonic crystal
- cavity designs.
-"""
-
-from typing import List
-
-import numpy
-
-
-def triangular_lattice(dims: List[int],
-                       asymmetrical: bool=False
-                       ) -> numpy.ndarray:
-    """
-    Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
-     a triangular lattice in 2D. The lattice will be centered around (0, 0),
-     unless asymmetrical=True in which case there will be extra holes in the +x
-     direction.
-
-    :param dims: Number of lattice sites in the [x, y] directions.
-    :param asymmetrical: If true, each row in x will contain the same number of
-            lattice sites. If false, the structure is symmetrical around (0, 0).
-    :return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
-    """
-    dims = numpy.array(dims, dtype=int)
-
-    if asymmetrical:
-        k = 0
-    else:
-        k = 1
-
-    positions = []
-    ymax = (dims[1] - 1)/2
-    for j in numpy.linspace(-ymax, ymax, dims[0]):
-        j_odd = numpy.floor(j) % 2
-
-        x_offset = j_odd * 0.5
-        y_offset = j * numpy.sqrt(3)/2
-
-        num_x = dims[0] - k * j_odd
-        xmax = (dims[0] - 1)/2
-        xs = numpy.linspace(-xmax, xmax - k * j_odd, num_x) + x_offset
-        ys = numpy.full_like(xs, y_offset)
-
-        positions += [numpy.vstack((xs, ys)).T]
-
-    xy = numpy.vstack(tuple(positions))
-    return xy[xy[:, 0].argsort(), ]
-
-
-def square_lattice(dims: List[int]) -> numpy.ndarray:
-    """
-    Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
-     a square lattice in 2D. The lattice will be centered around (0, 0).
-
-    :param dims: Number of lattice sites in the [x, y] directions.
-    :return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
-    """
-    xs, ys = numpy.meshgrid(range(dims[0]), range(dims[1]), 'xy')
-    xs -= dims[0]/2
-    ys -= dims[1]/2
-    xy = numpy.vstack((xs.flatten(), ys.flatten())).T
-    return xy[xy[:, 0].argsort(), ]
-
-# ### Photonic crystal functions ###
-
-
-def nanobeam_holes(a_defect: float,
-                   num_defect_holes: int,
-                   num_mirror_holes: int
-                   ) -> numpy.ndarray:
-    """
-    Returns a list of [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
-     Creates a region in which the lattice constant and radius are progressively
-     (linearly) altered over num_defect_holes holes until they reach the value
-     specified by a_defect, then symmetrically returned to a lattice constant and
-     radius of 1, which is repeated num_mirror_holes times on each side.
-
-    :param a_defect: Minimum lattice constant for the defect, as a fraction of the
-            mirror lattice constant (ie., for no defect, a_defect = 1).
-    :param num_defect_holes: How many holes form the defect (per-side)
-    :param num_mirror_holes: How many holes form the mirror (per-side)
-    :return: Ndarray [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
-    """
-    a_values = numpy.linspace(a_defect, 1, num_defect_holes, endpoint=False)
-    xs = a_values.cumsum() - (a_values[0] / 2)  # Later mirroring makes center distance 2x as long
-    mirror_xs = numpy.arange(1, num_mirror_holes + 1) + xs[-1]
-    mirror_rs = numpy.ones_like(mirror_xs)
-    return numpy.vstack((numpy.hstack((-mirror_xs[::-1], -xs[::-1], xs, mirror_xs)),
-                         numpy.hstack((mirror_rs[::-1], a_values[::-1], a_values, mirror_rs)))).T
-
-
-def ln_defect(mirror_dims: List[int], defect_length: int) -> numpy.ndarray:
-    """
-    N-hole defect in a triangular lattice.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param defect_length: Length of defect. Should be an odd number.
-    :return: [[x0, y0], [x1, y1], ...] for all the holes
-    """
-    if defect_length % 2 != 1:
-        raise Exception('defect_length must be odd!')
-    p = triangular_lattice([2 * d + 1 for d in mirror_dims])
-    half_length = numpy.floor(defect_length / 2)
-    hole_nums = numpy.arange(-half_length, half_length + 1)
-    holes_to_keep = numpy.in1d(p[:, 0], hole_nums, invert=True)
-    return p[numpy.logical_or(holes_to_keep, p[:, 1] != 0), ]
-
-
-def ln_shift_defect(mirror_dims: List[int],
-                    defect_length: int,
-                    shifts_a: List[float]=(0.15, 0, 0.075),
-                    shifts_r: List[float]=(1, 1, 1)
-                    ) -> numpy.ndarray:
-    """
-    N-hole defect with shifted holes (intended to give the mode a gaussian profile
-     in real- and k-space so as to improve both Q and confinement). Holes along the
-     defect line are shifted and altered according to the shifts_* parameters.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param defect_length: Length of defect. Should be an odd number.
-    :param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
-    :param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
-    :return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
-    """
-    if not hasattr(shifts_a, "__len__") and shifts_a is not None:
-        shifts_a = [shifts_a]
-    if not hasattr(shifts_r, "__len__") and shifts_r is not None:
-        shifts_r = [shifts_r]
-
-    xy = ln_defect(mirror_dims, defect_length)
-
-    # Add column for radius
-    xyr = numpy.hstack((xy, numpy.ones((xy.shape[0], 1))))
-
-    # Shift holes
-    # Expand shifts as necessary
-    n_shifted = max(len(shifts_a), len(shifts_r))
-
-    tmp_a = numpy.array(shifts_a)
-    shifts_a = numpy.ones((n_shifted, ))
-    shifts_a[:len(tmp_a)] = tmp_a
-
-    tmp_r = numpy.array(shifts_r)
-    shifts_r = numpy.ones((n_shifted, ))
-    shifts_r[:len(tmp_r)] = tmp_r
-
-    x_removed = numpy.floor(defect_length / 2)
-
-    for ind in range(n_shifted):
-        for sign in (-1, 1):
-            x_val = sign * (x_removed + ind + 1)
-            which = numpy.logical_and(xyr[:, 0] == x_val, xyr[:, 1] == 0)
-            xyr[which, ] = (x_val + numpy.sign(x_val) * shifts_a[ind], 0, shifts_r[ind])
-
-    return xyr
-
-
-def r6_defect(mirror_dims: List[int]) -> numpy.ndarray:
-    """
-    R6 defect in a triangular lattice.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :return: [[x0, y0], [x1, y1], ...] specifying hole centers.
-    """
-    xy = triangular_lattice([2 * d + 1 for d in mirror_dims])
-
-    rem_holes_plus = numpy.array([[1, 0],
-                                  [0.5, +numpy.sqrt(3)/2],
-                                  [0.5, -numpy.sqrt(3)/2]])
-    rem_holes = numpy.vstack((rem_holes_plus, -rem_holes_plus))
-
-    for rem_xy in rem_holes:
-        xy = xy[(xy != rem_xy).any(axis=1), ]
-
-    return xy
-
-
-def l3_shift_perturbed_defect(mirror_dims: List[int],
-                              perturbed_radius: float=1.1,
-                              shifts_a: List[float]=(),
-                              shifts_r: List[float]=()
-                              ) -> numpy.ndarray:
-    """
-    3-hole defect with perturbed hole sizes intended to form an upwards-directed
-     beam. Can also include shifted holes along the defect line, intended
-     to give the mode a more gaussian profile to improve Q.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param perturbed_radius: Amount to perturb the radius of the holes used for beam-forming
-    :param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
-    :param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
-    :return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
-    """
-    xyr = ln_shift_defect(mirror_dims, 3, shifts_a, shifts_r)
-
-    abs_x, abs_y = (numpy.fabs(xyr[:, i]) for i in (0, 1))
-
-    # Sorted unique xs and ys
-    # Ignore row y=0 because it might have shifted holes
-    xs = numpy.unique(abs_x[abs_x != 0])
-    ys = numpy.unique(abs_y)
-
-    # which holes should be perturbed? (xs[[3, 7]], ys[1]) and (xs[[2, 6]], ys[2])
-    perturbed_holes = ((xs[a], ys[b]) for a, b in ((3, 1), (7, 1), (2, 2), (6, 2)))
-    for row in xyr:
-        if numpy.fabs(row) in perturbed_holes:
-            row[2] = perturbed_radius
-    return xyr
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..e1a534a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,93 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "opencl_fdtd"
+description = "OpenCL FDTD solver"
+readme = "README.md"
+license = { file = "LICENSE.md" }
+authors = [
+    { name="Jan Petykiewicz", email="jan@mpxd.net" },
+    ]
+homepage = "https://mpxd.net/code/jan/opencl_fdtd"
+repository = "https://mpxd.net/code/jan/opencl_fdtd"
+keywords = [
+    "FDTD",
+    "finite",
+    "difference",
+    "time",
+    "domain",
+    "simulation",
+    "optics",
+    "electromagnetic",
+    "dielectric",
+    ]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Manufacturing",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3",
+    "Topic :: Scientific/Engineering",
+    ]
+requires-python = ">=3.11"
+dynamic = ["version"]
+dependencies = [
+    "numpy>=1.26",
+    "pyopencl",
+    "jinja2",
+    "meanas>=0.3",
+    ]
+
+[tool.hatch.version]
+path = "opencl_fdtd/__init__.py"
+
+
+[tool.ruff]
+exclude = [
+    ".git",
+    "dist",
+    ]
+line-length = 145
+indent-width = 4
+lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+lint.select = [
+    "NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG",
+    "C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT",
+    "ARG", "PL", "R", "TRY",
+    "G010", "G101", "G201", "G202",
+    "Q002", "Q003", "Q004",
+    ]
+lint.ignore = [
+    #"ANN001",   # No annotation
+    "ANN002",   # *args
+    "ANN003",   # **kwargs
+    "ANN401",   # Any
+    "ANN101",   # self: Self
+    "SIM108",   # single-line if / else assignment
+    "RET504",   # x=y+z; return x
+    "PIE790",   # unnecessary pass
+    "ISC003",   # non-implicit string concatenation
+    "C408",     # dict(x=y) instead of {'x': y}
+    "PLR09",    # Too many xxx
+    "PLR2004",  # magic number
+    "PLC0414",  # import x as x
+    "TRY003",   # Long exception message
+    ]
+
+
+[[tool.mypy.overrides]]
+module = [
+    "scipy",
+    "scipy.optimize",
+    "scipy.linalg",
+    "scipy.sparse",
+    "scipy.sparse.linalg",
+    "pyopencl",
+    "pyopencl.array",
+    "pyopencl.elementwise",
+    "pyopencl.reduction",
+    ]
+ignore_missing_imports = true
diff --git a/setup.py b/setup.py
deleted file mode 100644
index b7fd3cf..0000000
--- a/setup.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env python3
-
-from setuptools import setup, find_packages
-import opencl_fdtd
-
-with open('README.md', 'r') as f:
-    long_description = f.read()
-
-setup(name='opencl_fdtd',
-      version=opencl_fdtd.version,
-      description='OpenCL FDTD solver',
-      long_description=long_description,
-      long_description_content_type='text/markdown',
-      author='Jan Petykiewicz',
-      author_email='anewusername@gmail.com',
-      url='https://mpxd.net/code/jan/opencl_fdtd',
-      packages=find_packages(),
-      package_data={
-          'opencl_fdfd': ['kernels/*']
-      },
-      install_requires=[
-            'numpy',
-            'pyopencl',
-            'jinja2',
-            'fdfd_tools>=0.3',
-      ],
-      extras_require={
-      },
-      )
-