From 1627d0da052f24f85838c3a70a9c1534692bc163 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Mon, 15 Jul 2019 00:11:09 -0700
Subject: [PATCH 01/36] Consolidate poynting code into update_h

---
 opencl_fdtd/kernels/update_h.cl | 56 +++++++++++++++++++++------------
 opencl_fdtd/simulation.py       | 12 ++-----
 2 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 187a18a..a709486 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -6,13 +6,14 @@
  *   common_header: Rendered contents of common.cl
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
  *      axes, polarities, and thicknesses.
- *   do_poynting: Whether to precalculate poynting vector components (boolean)
+ *   do_poynting: Whether to calculate poynting vector (boolean)
+ *   do_poynting_halves: Whether to calculate half-step poynting vectors (boolean)
  *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
  *      Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
  *      OpenCL args.
  *
  *  OpenCL args:
- *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [S], [S0, S1]
  */
 
 {{common_header}}
@@ -52,7 +53,7 @@ if ({{r}} == s{{r}} - 1) {
     dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
     dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
 }
-{% endfor -%}
+{%- endfor %}
 
 {%- if do_poynting %}
 
@@ -118,7 +119,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 /*
  *  Update H
  */
-{% if do_poynting -%}
+{% if do_poynting or do_poynting_halves -%}
 // Save old H for averaging
 ftype Hx_old = Hx[i];
 ftype Hy_old = Hy[i];
@@ -131,25 +132,40 @@ Hy[i] -= dt * (dExz - dEzx - pHyi);
 Hz[i] -= dt * (dEyx - dExy - pHzi);
 
 {% if do_poynting -%}
+/*
+ *  Calculate unscaled S components at ??? locations
+ *  //TODO: document S locations and types
+ */
+__global ftype *Sx = S + XX;
+__global ftype *Sy = S + YY;
+__global ftype *Sz = S + ZZ;
+
 // Average H across timesteps
 ftype aHxt = Hx[i] + Hx_old;
 ftype aHyt = Hy[i] + Hy_old;
 ftype aHzt = Hz[i] + Hz_old;
 
-/*
- *  Calculate unscaled S components at H locations
- */
-__global ftype *oSxy = oS + 0 * field_size;
-__global ftype *oSyz = oS + 1 * field_size;
-__global ftype *oSzx = oS + 2 * field_size;
-__global ftype *oSxz = oS + 3 * field_size;
-__global ftype *oSyx = oS + 4 * field_size;
-__global ftype *oSzy = oS + 5 * field_size;
-
-oSxy[i] = aEyx * aHzt;
-oSxz[i] = -aEzx * aHyt;
-oSyz[i] = aEzy * aHxt;
-oSyx[i] = -aExy * aHzt;
-oSzx[i] = aExz * aHyt;
-oSzy[i] = -aEyz * aHxt;
+Sx[i] = Ey[i + px] * aHzt - Ez[i + px] * aHyt;
+Sy[i] = Ez[i + py] * aHxt - Ex[i + py] * aHzt;
+Sz[i] = Ex[i + pz] * aHyt - Ey[i + pz] * aHxt;
+{%- endif -%}
+
+{% if do_poynting_halves -%}
+/*
+ *  Calculate unscaled S components at ??? locations
+ *  //TODO: document S locations and types
+ */
+__global ftype *Sx0 = S0 + XX;
+__global ftype *Sy0 = S0 + YY;
+__global ftype *Sz0 = S0 + ZZ;
+__global ftype *Sx1 = S1 + XX;
+__global ftype *Sy1 = S1 + YY;
+__global ftype *Sz1 = S1 + ZZ;
+
+Sx0[i] = Ey[i + px] * Hz_old - Ez[i + px] * Hy_old;
+Sy0[i] = Ez[i + py] * Hx_old - Ex[i + py] * Hz_old;
+Sz0[i] = Ex[i + pz] * Hy_old - Ey[i + pz] * Hx_old;
+Sx1[i] = Ey[i + px] * Hz[i] - Ez[i + px] * Hy[i];
+Sy1[i] = Ez[i + py] * Hx[i] - Ex[i + py] * Hz[i];
+Sz1[i] = Ex[i + pz] * Hy[i] - Ey[i + pz] * Hx[i];
 {%- endif -%}
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 03b5588..5faea22 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -26,7 +26,7 @@ class Simulation(object):
     """
     Constructs and holds the basic FDTD operations and related fields
 
-    After constructing this object, call the (update_E, update_H, update_S) members
+    After constructing this object, call the (update_E, update_H) members
      to perform FDTD updates on the stored (E, H, S) fields:
 
         pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
@@ -43,8 +43,6 @@ class Simulation(object):
             # Perturb the field (i.e., add a soft current source)
             sim.E[ind] += numpy.sin(omega * t * sim.dt)
             event = sim.update_H([])
-            if sim.update_S:
-                event = sim.update_S([event])
             event.wait()
 
             with lzma.open('saved_simulation', 'wb') as f:
@@ -112,6 +110,7 @@ class Simulation(object):
         :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
         :param do_poynting: If true, enables calculation of the poynting vector, S.
                 Poynting vector calculation adds the following computational burdens:
+                    ****INACCURATE, TODO FIXME*****
                     * During update_H, ~6 extra additions/cell are performed in order to spatially
                         average E and temporally average H. These quantities are multiplied
                         (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
@@ -226,12 +225,7 @@ class Simulation(object):
 
         S_fields = OrderedDict()
         if do_poynting:
-            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
-            self.sources['S'] = S_source
-
-            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
             self.S = pyopencl.array.zeros_like(self.E)
-            S_fields[ptr('oS')] = self.oS
             S_fields[ptr('S')] = self.S
 
         J_fields = OrderedDict()
@@ -257,8 +251,6 @@ class Simulation(object):
         '''
         self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
         self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
-        if do_poynting:
-            self.update_S = self._create_operation(S_source, (base_fields, S_fields))
         if bloch_boundaries:
             self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
             self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))

From 89d99187b6153c21229a1cf00e12b17100ec67be Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Wed, 17 Jul 2019 00:52:31 -0700
Subject: [PATCH 02/36] Remove unused code

---
 opencl_fdtd/kernels/update_h.cl | 17 ----------------
 opencl_fdtd/kernels/update_s.cl | 36 ---------------------------------
 2 files changed, 53 deletions(-)
 delete mode 100644 opencl_fdtd/kernels/update_s.cl

diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index a709486..a8a6b04 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -55,23 +55,6 @@ if ({{r}} == s{{r}} - 1) {
 }
 {%- endfor %}
 
-{%- if do_poynting %}
-
-
-/*
- *  Precalculate averaged E
- */
-ftype aExy = Ex[i + py] + Ex[i];
-ftype aExz = Ex[i + pz] + Ex[i];
-
-ftype aEyx = Ey[i + px] + Ey[i];
-ftype aEyz = Ey[i + pz] + Ey[i];
-
-ftype aEzx = Ez[i + px] + Ez[i];
-ftype aEzy = Ez[i + py] + Ez[i];
-{%- endif %}
-
-
 
 
 /*
diff --git a/opencl_fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl
deleted file mode 100644
index 94e061e..0000000
--- a/opencl_fdtd/kernels/update_s.cl
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *  Update E-field, including any PMLs.
- *
- *  Template parameters:
- *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
- *
- *  OpenCL args:
- *   E, H, dt, S, oS
- */
-
-{{common_header}}
-
-//////////////////////////////////////////////////////////////////////
-
-
-/*
- * Calculate S from oS (pre-calculated components)
- */
-__global ftype *Sx = S + XX;
-__global ftype *Sy = S + YY;
-__global ftype *Sz = S + ZZ;
-
-// Use unscaled S components from H locations
-__global ftype *oSxy = oS + 0 * field_size;
-__global ftype *oSyz = oS + 1 * field_size;
-__global ftype *oSzx = oS + 2 * field_size;
-__global ftype *oSxz = oS + 3 * field_size;
-__global ftype *oSyx = oS + 4 * field_size;
-__global ftype *oSzy = oS + 5 * field_size;
-
-ftype s_factor = dt * 0.125;
-Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
-Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
-Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;

From cf0db63a3ff7b8d1068fc0b7b5e6b709b669278b Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Wed, 17 Jul 2019 00:53:55 -0700
Subject: [PATCH 03/36] Revert "Add restrict keyword to pointers (not sharing
 the same memory for multiple fields)"

This reverts commit 60b70bb332d29fb5a2aecb801517be659cf4daf9.

It appears to have minimal performance impact, and fails to compile on
nvidia cards.
---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 5faea22..2f919cd 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -169,7 +169,7 @@ class Simulation(object):
         ctype = type_to_C(self.arg_type)
 
         def ptr(arg: str) -> str:
-            return ctype + ' * restrict ' + arg
+            return ctype + ' *' + arg
 
         base_fields = OrderedDict()
         base_fields[ptr('E')] = self.E

From 23d65d6ebb7967bfb1219aec14147f609fbb5b20 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Tue, 30 Jul 2019 00:33:07 -0700
Subject: [PATCH 04/36] Add poynting halves

maybe will remove this later... dunno if it's actually useful
---
 opencl_fdtd/simulation.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 2f919cd..b67df26 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -83,6 +83,7 @@ class Simulation(object):
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
                  do_poynting: bool = True,
+                 do_poynting_halves = False,
                  do_fieldsrc: bool = False):
         """
         Initialize the simulation.
@@ -119,6 +120,7 @@ class Simulation(object):
                         in complexity.
                     * GPU memory requirements are approximately doubled, since S and the intermediate
                         products must be stored.
+        :param do_poynting_halves: TODO DOCUMENT
         """
         if initial_fields is None:
             initial_fields = {}
@@ -202,6 +204,7 @@ class Simulation(object):
                 'common_header': common_source,
                 'pmls': pmls,
                 'do_poynting': do_poynting,
+                'do_poynting_halves': do_poynting_halves,
                 'bloch': bloch_boundaries,
                 'uniform_dx': uniform_dx,
                 }
@@ -227,6 +230,12 @@ class Simulation(object):
         if do_poynting:
             self.S = pyopencl.array.zeros_like(self.E)
             S_fields[ptr('S')] = self.S
+        if do_poynting_halves:
+            self.S0 = pyopencl.array.zeros_like(self.E)
+            self.S1 = pyopencl.array.zeros_like(self.E)
+            S_fields[ptr('S0')] = self.S0
+            S_fields[ptr('S1')] = self.S1
+
 
         J_fields = OrderedDict()
         if do_fieldsrc:

From 0c7b2fd3a2fbfc413cbefaab4f579401d2e49d8c Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Tue, 30 Jul 2019 00:33:35 -0700
Subject: [PATCH 05/36] add test_simulation

---
 opencl_fdtd/test_simulation.py | 368 +++++++++++++++++++++++++++++++++
 1 file changed, 368 insertions(+)
 create mode 100644 opencl_fdtd/test_simulation.py

diff --git a/opencl_fdtd/test_simulation.py b/opencl_fdtd/test_simulation.py
new file mode 100644
index 0000000..f2fafa2
--- /dev/null
+++ b/opencl_fdtd/test_simulation.py
@@ -0,0 +1,368 @@
+import unittest
+import numpy
+
+from opencl_fdtd import Simulation
+from fdfd_tools import fdtd
+
+
+class BasicTests():
+    def test_initial_fields(self):
+        # Make sure initial fields didn't change
+        e0 = self.es[0]
+        h0 = self.hs[0]
+        mask = self.src_mask
+
+        self.assertEqual(e0[mask], self.j_mag / self.epsilon[mask])
+        self.assertFalse(e0[~mask].any())
+        self.assertFalse(h0.any())
+
+
+    def test_initial_energy(self):
+        e0 = self.es[0]
+        h0 = self.hs[0]
+        h1 = self.hs[1]
+        mask = self.src_mask[1]
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in e0.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+        u0 = self.j_mag * self.j_mag / self.epsilon[self.src_mask] * dV[mask]
+        args = {'dxes': self.dxes,
+                'epsilon': self.epsilon}
+
+        # Make sure initial energy and E dot J are correct
+        energy0 = fdtd.energy_estep(h0=h0, e1=e0, h2=self.hs[1], **args)
+        e_dot_j_0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes)
+        self.assertTrue(numpy.allclose(energy0[mask], u0))
+        self.assertFalse(energy0[~mask].any(), msg='energy0: {}'.format(energy0))
+        self.assertTrue(numpy.allclose(e_dot_j_0[mask], u0))
+        self.assertFalse(e_dot_j_0[~mask].any(), msg='e_dot_j_0: {}'.format(e_dot_j_0))
+
+
+    def test_energy_conservation(self):
+        e0 = self.es[0]
+        u0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes).sum()
+        args = {'dxes': self.dxes,
+                'epsilon': self.epsilon}
+
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+                self.assertTrue(numpy.allclose(u_hstep.sum(), u0), msg='u_hstep: {}\n{}'.format(u_hstep.sum(), numpy.rollaxis(u_hstep, -1)))
+                self.assertTrue(numpy.allclose(u_estep.sum(), u0), msg='u_estep: {}\n{}'.format(u_estep.sum(), numpy.rollaxis(u_estep, -1)))
+
+
+    def test_poynting(self):
+        for ii in range(1, 3):
+            with self.subTest(i=ii):
+                s = fdtd.poynting(e=self.es[ii], h=self.hs[ii+1] + self.hs[ii])
+                self.assertTrue(numpy.allclose(s, self.ss[ii], rtol=1e-4),
+                                msg='From ExH:\n{}\nFrom sim.S:\n{}'.format(numpy.rollaxis(s, -1),
+                                                                            numpy.rollaxis(self.ss[ii], -1)))
+
+
+    def test_poynting_divergence(self):
+        args = {'dxes': self.dxes,
+                'epsilon': self.epsilon}
+
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+
+        u_eprev = None
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+
+                du_half_h2e = u_estep - u_hstep
+                div_s_h2e = self.dt * fdtd.poynting_divergence(e=self.es[ii], h=self.hs[ii], dxes=self.dxes) * dV
+                self.assertTrue(numpy.allclose(du_half_h2e, -div_s_h2e, rtol=1e-4),
+                                msg='du_half_h2e\n{}\ndiv_s_h2e\n{}'.format(numpy.rollaxis(du_half_h2e, -1),
+                                                                           -numpy.rollaxis(div_s_h2e, -1)))
+
+                if u_eprev is None:
+                    u_eprev = u_estep
+                    continue
+
+                # previous half-step
+                du_half_e2h = u_hstep - u_eprev
+                div_s_e2h = self.dt * fdtd.poynting_divergence(e=self.es[ii-1], h=self.hs[ii], dxes=self.dxes) * dV
+                self.assertTrue(numpy.allclose(du_half_e2h, -div_s_e2h, rtol=1e-4),
+                                msg='du_half_e2h\n{}\ndiv_s_e2h\n{}'.format(numpy.rollaxis(du_half_e2h, -1),
+                                                                           -numpy.rollaxis(div_s_e2h, -1)))
+                u_eprev = u_estep
+
+
+    def test_poynting_planes(self):
+        args = {'dxes': self.dxes,
+                'epsilon': self.epsilon}
+        dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
+        dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
+
+        mx = numpy.roll(self.src_mask, (-1, -1), axis=(0, 1))
+        my = numpy.roll(self.src_mask, -1, axis=2)
+        mz = numpy.roll(self.src_mask, (+1, -1), axis=(0, 3))
+        px = numpy.roll(self.src_mask, -1, axis=0)
+        py = self.src_mask.copy()
+        pz = numpy.roll(self.src_mask, +1, axis=0)
+
+        u_eprev = None
+        for ii in range(1, 8):
+            with self.subTest(i=ii):
+                u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
+                u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
+
+                s_h2e = -fdtd.poynting(e=self.es[ii], h=self.hs[ii]) * self.dt
+                s_h2e[0] *= dxes[0][1][None, :, None] * dxes[0][2][None, None, :]
+                s_h2e[1] *= dxes[0][0][:, None, None] * dxes[0][2][None, None, :]
+                s_h2e[2] *= dxes[0][0][:, None, None] * dxes[0][1][None, :, None]
+                planes = [s_h2e[px].sum(), -s_h2e[mx].sum(),
+                          s_h2e[py].sum(), -s_h2e[my].sum(),
+                          s_h2e[pz].sum(), -s_h2e[mz].sum()]
+                self.assertTrue(numpy.allclose(sum(planes), (u_estep - u_hstep)[self.src_mask[1]]),
+                                msg='planes: {} (sum: {})\n du:\n {}'.format(planes, sum(planes), (u_estep - u_hstep)[self.src_mask[1]]))
+
+                if u_eprev is None:
+                    u_eprev = u_estep
+                    continue
+
+                s_e2h = -fdtd.poynting(e=self.es[ii - 1], h=self.hs[ii]) * self.dt
+                s_e2h[0] *= dxes[0][1][None, :, None] * dxes[0][2][None, None, :]
+                s_e2h[1] *= dxes[0][0][:, None, None] * dxes[0][2][None, None, :]
+                s_e2h[2] *= dxes[0][0][:, None, None] * dxes[0][1][None, :, None]
+                planes = [s_e2h[px].sum(), -s_e2h[mx].sum(),
+                          s_e2h[py].sum(), -s_e2h[my].sum(),
+                          s_e2h[pz].sum(), -s_e2h[mz].sum()]
+                self.assertTrue(numpy.allclose(sum(planes), (u_hstep - u_eprev)[self.src_mask[1]]),
+                                msg='planes: {} (sum: {})\n du:\n {}'.format(planes, sum(planes), (u_hstep - u_eprev)[self.src_mask[1]]))
+
+                # previous half-step
+                u_eprev = u_estep
+
+
+
+
+class Basic2DNoDXOnlyVacuum(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 1]
+        self.dt = 0.5
+        self.epsilon = numpy.ones(shape, dtype=float)
+        self.j_mag = 32
+        self.dxes = None
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 0] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic2DUniformDX3(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 1, 5]
+        self.dt = 0.5
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.full(s, 2.0) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 0, 2] = True
+
+        self.epsilon = numpy.full(shape, 1, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDXOnlyVacuum(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.epsilon = numpy.ones(shape, dtype=float)
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDXUniformN(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.epsilon = numpy.full(shape, 2.5, dtype=float)
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class Basic3DUniformDX(unittest.TestCase, BasicTests):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.33
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        self.epsilon = numpy.full(shape, 1, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        e = numpy.zeros_like(self.epsilon)
+        h = numpy.zeros_like(self.epsilon)
+        e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True)
+
+        for _ in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+            self.es.append(sim.E.get().reshape(shape))
+            self.hs.append(sim.H.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+class JdotE_3DUniformDX(unittest.TestCase):
+    def setUp(self):
+        shape = [3, 5, 5, 5]
+        self.dt = 0.5
+        self.j_mag = 32
+        self.dxes = tuple(tuple(numpy.full(s, 2.0) for s in shape[1:]) for _ in range(2))
+
+        self.src_mask = numpy.zeros(shape, dtype=bool)
+        self.src_mask[1, 2, 2, 2] = True
+
+        self.epsilon = numpy.full(shape, 4, dtype=float)
+        self.epsilon[self.src_mask] = 2
+
+        e = numpy.random.randint(-128, 128 + 1, size=shape).astype(float)
+        h = numpy.random.randint(-128, 128 + 1, size=shape).astype(float)
+        self.es = [e]
+        self.hs = [h]
+        self.ss = []
+
+        sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
+                         initial_fields={'E': e, 'H': h}, do_poynting=True, float_type=numpy.float64)
+
+        eh2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
+        eh2e = fdtd.maxwell_e(dt=self.dt, dxes=self.dxes)
+        for ii in range(9):
+            e = e.copy()
+            h = h.copy()
+            sim.update_H([]).wait()
+            sim.update_E([]).wait()
+
+            if ii == 1:
+                nn = numpy.where(self.src_mask.flat)[0][0]
+                self.e_before = sim.E.get().reshape(shape)
+                sim.E[nn] += self.j_mag / self.epsilon[self.src_mask][0]
+                self.e_after = sim.E.get().reshape(shape)
+                self.j_dot_e = self.j_mag * e[self.src_mask]
+
+            self.hs.append(sim.H.get().reshape(shape))
+            self.es.append(sim.E.get().reshape(shape))
+            self.ss.append(sim.S.get().reshape(shape))
+
+
+    def test_j_dot_e(self):
+        h0 = self.hs[2]
+        e0 = self.es[1]
+        e1 = self.es[2]
+        j0 = numpy.zeros_like(e0)
+        j0[self.src_mask] = self.j_mag
+        args = {'dxes': self.dxes,
+                'epsilon': self.epsilon}
+        e2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
+
+        #ee = j0 * (2 * e0 - j0)
+        #hh = h0 * e2h(j0, numpy.zeros_like(h0))
+        #u0 = fdtd.dxmul(ee, hh, **args)
+        u0 = fdtd.delta_energy_j(j0=j0, e1=(e0 + e1), dxes=self.dxes)
+
+        #uh = [fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args) for ii in range(1, 9)]
+        #ue = [fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args) for ii in range(8)]
+        #uht = [uu.sum() for uu in uh]
+        #uet = [uu.sum() for uu in ue]
+
+        u_hstep = fdtd.energy_hstep(e0=self.es[0], h1=self.hs[1], e2=self.es[1], **args)
+        u_estep = fdtd.energy_estep(h0=self.hs[-2], e1=self.es[-2], h2=self.hs[-1], **args)
+        #breakpoint()
+        self.assertTrue(numpy.allclose(u0.sum(), (u_estep - u_hstep).sum()), msg='{} != {}'.format(u0.sum(), (u_estep - u_hstep).sum()))

From a4dd03166651f2ceea81416ac9e143effe91f952 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Aug 2019 22:13:26 -0700
Subject: [PATCH 06/36] ongoing

---
 fdtd.py                   | 111 ++++++++++++++++++++++++++++----------
 opencl_fdtd/simulation.py |  17 ++++++
 2 files changed, 100 insertions(+), 28 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 99af07b..da47177 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -7,6 +7,7 @@ See main() for simulation setup.
 import sys
 import time
 import logging
+import pyopencl
 
 import numpy
 import lzma
@@ -82,7 +83,7 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
 
 
 def main():
-    max_t = 8000            # number of timesteps
+    max_t = 4000            # number of timesteps
 
     dx = 25                 # discretization (nm/cell)
     pml_thickness = 8       # (number of cells)
@@ -101,33 +102,37 @@ def main():
     n_air = 1.0   # air
 
     # Half-dimensions of the simulation grid
-    xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
-    z_max = 1.6 * a
-    xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
-
-    # Coordinates of the edges of the cells.
-    #  The fdtd package can only do square grids at the moment.
-    half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
-    edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
+#    xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
+#    z_max = 1.6 * a
+#    xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
+#
+#    # Coordinates of the edges of the cells.
+#    #  The fdtd package can only do square grids at the moment.
+#    half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
+#    edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
+    edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-1, 1), numpy.arange(-100.5, 101)]
+#    edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-100.5, 101), numpy.arange(-1, 1)]
 
     # #### Create the grid, mask, and draw the device ####
     grid = gridlock.Grid(edge_coords, initial=n_air**2, num_grids=3)
-    grid.draw_slab(surface_normal=gridlock.Direction.z,
-                   center=[0, 0, 0],
-                   thickness=th,
-                   eps=n_slab**2)
-    mask = perturbed_l3(a, r)
-
-    grid.draw_polygons(surface_normal=gridlock.Direction.z,
-                       center=[0, 0, 0],
-                       thickness=2 * th,
-                       eps=n_air**2,
-                       polygons=mask.as_polygons())
+#    grid.draw_slab(surface_normal=gridlock.Direction.z,
+#                   center=[0, 0, 0],
+#                   thickness=th,
+#                   eps=n_slab**2)
+#    mask = perturbed_l3(a, r)
+#
+#    grid.draw_polygons(surface_normal=gridlock.Direction.z,
+#                       center=[0, 0, 0],
+#                       thickness=2 * th,
+#                       eps=n_air**2,
+#                       polygons=mask.as_polygons())
 
     logger.info('grid shape: {}'.format(grid.shape))
     # #### Create the simulation grid ####
+#    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
+#            for a in 'xyz' for p in 'np']
     pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
-            for a in 'xyz' for p in 'np']
+            for a in 'xz' for p in 'np']
     #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
     bloch = []
     sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
@@ -155,25 +160,68 @@ def main():
             f.write('\n=====================G=====================\n')
             f.write(sim.sources['G'])
 
+
+    planes = numpy.empty((max_t, 4))
+    planes2 = numpy.empty((max_t, 4))
+    Ectr = numpy.empty(max_t)
+    u = numpy.empty(max_t)
+    ui = numpy.empty(max_t)
     # #### Run a bunch of iterations ####
     # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
     #  immediately and run once prev_event is finished.
     start = time.perf_counter()
     for t in range(max_t):
         e = sim.update_E([])
-        if bloch:
-            e = sim.update_F([e])
+#        if bloch:
+#            e = sim.update_F([e])
         e.wait()
 
         ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
-        sim.E[ind] += field_source(t)
+#        sim.E[ind] += field_source(t)
+        if t == 2:
+            sim.E[ind] += 1e6
+
+        h_old = sim.H.copy()
+
         e = sim.update_H([])
-        if bloch:
-            e = sim.update_G([e])
-        if sim.update_S:
-            e = sim.update_S([e])
+#        if bloch:
+#            e = sim.update_G([e])
         e.wait()
 
+        S = sim.S.get().reshape(grid.grids.shape) * sim.dt * dx * dx *dx
+        m = 30
+        planes[t] = (
+            S[0][+pml_thickness+2, :, pml_thickness+3:-pml_thickness-3].sum(),
+            S[0][-pml_thickness-2, :, pml_thickness+3:-pml_thickness-3].sum(),
+            S[2][pml_thickness+2:-pml_thickness-2, :, +pml_thickness+2].sum(),
+            S[2][pml_thickness+2:-pml_thickness-2, :, -pml_thickness-2].sum(),
+            )
+        planes2[t] = (
+            S[0][grid.shape[0]//2-1, 0, grid.shape[2]//2].sum(),
+            S[0][grid.shape[0]//2  , 0, grid.shape[2]//2].sum(),
+            S[2][grid.shape[0]//2  , 0, grid.shape[2]//2-1].sum(),
+            S[2][grid.shape[0]//2  , 0, grid.shape[2]//2].sum(),
+            )
+
+#        planes[t] = (
+#            S[0][+pml_thickness+m, pml_thickness+m+1:-pml_thickness-m, :].sum(),
+#            S[0][-pml_thickness-m, pml_thickness+m+1:-pml_thickness-m, :].sum(),
+#            S[1][pml_thickness+1+m:-pml_thickness-m, +pml_thickness+m, :].sum(),
+#            S[1][pml_thickness+1+m:-pml_thickness-m, -pml_thickness-m, :].sum(),
+#            )
+#        planes2[t] = (
+#            S[0][grid.shape[0]//2-1, grid.shape[1]//2  , 0].sum(),
+#            S[0][grid.shape[0]//2  , grid.shape[1]//2  , 0].sum(),
+#            S[1][grid.shape[0]//2  , grid.shape[1]//2-1, 0].sum(),
+#            S[1][grid.shape[0]//2  , grid.shape[1]//2  , 0].sum(),
+#            )
+        Ectr[t] = sim.E[ind].get()
+        u[t] = pyopencl.array.sum(sim.E * sim.E * sim.eps + h_old * sim.H).get() * dx * dx * dx
+        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(grid.grids.shape).get()[:, pml_thickness+m:-pml_thickness-m, :,
+                                                                                             pml_thickness+m:-pml_thickness-m].sum() * dx * dx * dx
+#        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(grid.grids.shape).get()[:, pml_thickness+m:-pml_thickness-m,
+#                                                                                             pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
+
         if t % 100 == 0:
             logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
             sys.stdout.flush()
@@ -185,6 +233,13 @@ def main():
             'grid': grid,
             'E': unvec(sim.E.get()),
             'H': unvec(sim.H.get()),
+            'dt': sim.dt,
+            'dx': dx,
+            'planes': planes,
+            'planes2': planes2,
+            'Ectr': Ectr,
+            'u': u,
+            'ui': ui,
             }
         if sim.S is not None:
             d['S'] = unvec(sim.S.get())
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index b67df26..cfd4a0a 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -375,3 +375,20 @@ def type_to_C(float_type) -> str:
     else:
         raise Exception('Unsupported type')
     return arg_type
+
+#            def par(x):
+#                scaling = ((x / (pml['thickness'])) ** pml['m'])
+#                print('scaling', scaling)
+#                print('sigma_max * dt / 2', sigma_max * self.dt / 2)
+#                print('kappa_max', kappa_max)
+#                print('m', pml['m'])
+#                sigma = scaling * sigma_max
+#                kappa = 1 + scaling * (kappa_max - 1)
+#                alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
+#                p0 = 1/(1 + self.dt * (alpha + sigma / kappa))
+#                p1 = self.dt * sigma / kappa * p0
+#                p2 = 1/kappa
+#                print(p0.min(), p0.max(), p1.min(), p1.max())
+#                return p0, p1, p2
+#
+#

From 4e0bd8b3c61a2d7de30b95735ad81b0a4b28517f Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 16 Oct 2020 19:32:22 -0700
Subject: [PATCH 07/36] switch to meanas

---
 fdtd.py                   | 4 ++--
 opencl_fdtd/simulation.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index da47177..37f70f9 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -17,7 +17,7 @@ from opencl_fdtd import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
-import fdfd_tools
+import meanas
 
 
 __author__ = 'Jan Petykiewicz'
@@ -228,7 +228,7 @@ def main():
 
     with lzma.open('saved_simulation', 'wb') as f:
         def unvec(f):
-            return fdfd_tools.unvec(f, grid.shape)
+            return meanas.unvec(f, grid.shape)
         d = {
             'grid': grid,
             'E': unvec(sim.E.get()),
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index cfd4a0a..b708411 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -12,7 +12,7 @@ import pyopencl
 import pyopencl.array
 from pyopencl.elementwise import ElementwiseKernel
 
-from fdfd_tools import vec
+from meanas import vec
 
 
 __author__ = 'Jan Petykiewicz'

From 8cbb0e9864e260dd80ce23c2745c7e643dcb5f29 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 16 Oct 2020 19:32:35 -0700
Subject: [PATCH 08/36] call update_s during loop

---
 opencl_fdtd/simulation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index b708411..5f52e35 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -26,7 +26,7 @@ class Simulation(object):
     """
     Constructs and holds the basic FDTD operations and related fields
 
-    After constructing this object, call the (update_E, update_H) members
+    After constructing this object, call the (update_E, update_H, update_S) members
      to perform FDTD updates on the stored (E, H, S) fields:
 
         pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
@@ -43,6 +43,8 @@ class Simulation(object):
             # Perturb the field (i.e., add a soft current source)
             sim.E[ind] += numpy.sin(omega * t * sim.dt)
             event = sim.update_H([])
+            if sim.update_S:
+                event = sim.update_S([event])
             event.wait()
 
             with lzma.open('saved_simulation', 'wb') as f:

From 8afe1d1f266ac1e112bf20c4c9899be1ad3d89dd Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 16 Oct 2020 19:32:53 -0700
Subject: [PATCH 09/36] style fixes per flake8

---
 .flake8                   | 29 +++++++++++++++++++++++++++++
 opencl_fdtd/simulation.py | 29 +++++++++++++----------------
 2 files changed, 42 insertions(+), 16 deletions(-)
 create mode 100644 .flake8

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..0042015
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,29 @@
+[flake8]
+ignore =
+    # E501 line too long
+    E501,
+    # W391 newlines at EOF
+    W391,
+    # E241 multiple spaces after comma
+    E241,
+    # E302 expected 2 newlines
+    E302,
+    # W503 line break before binary operator (to be deprecated)
+    W503,
+    # E265 block comment should start with '# '
+    E265,
+    # E123 closing bracket does not match indentation of opening bracket's line
+    E123,
+    # E124 closing bracket does not match visual indentation
+    E124,
+    # E221 multiple spaces before operator
+    E221,
+    # E201 whitespace after '['
+    E201,
+    # E741 ambiguous variable name 'I'
+    E741,
+
+
+per-file-ignores =
+    # F401 import without use
+    */__init__.py: F401,
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 5f52e35..70db2a9 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -85,7 +85,7 @@ class Simulation(object):
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
                  do_poynting: bool = True,
-                 do_poynting_halves = False,
+                 do_poynting_halves: bool = False,
                  do_fieldsrc: bool = False):
         """
         Initialize the simulation.
@@ -179,7 +179,7 @@ class Simulation(object):
         base_fields[ptr('E')] = self.E
         base_fields[ptr('H')] = self.H
         base_fields[ctype + ' dt'] = self.dt
-        if uniform_dx == False:
+        if uniform_dx is False:
             inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
             for name, field in zip(inv_dx_names, self.inv_dxes):
                 base_fields[ptr(name)] = field
@@ -199,17 +199,17 @@ class Simulation(object):
             bloch_fields[ptr('G')] = self.H
 
         common_source = jinja_env.get_template('common.cl').render(
-                ftype=ctype,
-                shape=self.shape,
-                )
+            ftype=ctype,
+            shape=self.shape,
+            )
         jinja_args = {
-                'common_header': common_source,
-                'pmls': pmls,
-                'do_poynting': do_poynting,
-                'do_poynting_halves': do_poynting_halves,
-                'bloch': bloch_boundaries,
-                'uniform_dx': uniform_dx,
-                }
+            'common_header': common_source,
+            'pmls': pmls,
+            'do_poynting': do_poynting,
+            'do_poynting_halves': do_poynting_halves,
+            'bloch': bloch_boundaries,
+            'uniform_dx': uniform_dx,
+            }
         E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
         H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
         self.sources['E'] = E_source
@@ -227,7 +227,6 @@ class Simulation(object):
             self.sources['F'] = F_source
             self.sources['G'] = G_source
 
-
         S_fields = OrderedDict()
         if do_poynting:
             self.S = pyopencl.array.zeros_like(self.E)
@@ -238,7 +237,6 @@ class Simulation(object):
             S_fields[ptr('S0')] = self.S0
             S_fields[ptr('S1')] = self.S1
 
-
         J_fields = OrderedDict()
         if do_fieldsrc:
             J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
@@ -249,7 +247,6 @@ class Simulation(object):
             J_fields[ptr('Jr')] = self.Jr
             J_fields[ptr('Ji')] = self.Ji
 
-
         '''
         PML
         '''
@@ -273,9 +270,9 @@ class Simulation(object):
                                        arguments=', '.join(list(args.keys()) + var_args))
             self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
 
-
     def _create_pmls(self, pmls):
         ctype = type_to_C(self.arg_type)
+
         def ptr(arg: str) -> str:
             return ctype + ' *' + arg
 

From 45e047c50e4b7a67a7d4a6fbec50e62b456788dc Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:04:18 -0700
Subject: [PATCH 10/36] Move to VERSION.py approach for version

---
 opencl_fdtd/VERSION.py  | 4 ++++
 opencl_fdtd/__init__.py | 3 ++-
 setup.py                | 6 ++++--
 3 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 opencl_fdtd/VERSION.py

diff --git a/opencl_fdtd/VERSION.py b/opencl_fdtd/VERSION.py
new file mode 100644
index 0000000..202c26a
--- /dev/null
+++ b/opencl_fdtd/VERSION.py
@@ -0,0 +1,4 @@
+""" VERSION defintion. THIS FILE IS MANUALLY PARSED BY setup.py and REQUIRES A SPECIFIC FORMAT """
+__version__ = '''
+0.4
+'''.strip()
diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
index b621c19..9bdf6e0 100644
--- a/opencl_fdtd/__init__.py
+++ b/opencl_fdtd/__init__.py
@@ -2,4 +2,5 @@ from .simulation import Simulation, type_to_C
 
 __author__ = 'Jan Petykiewicz'
 
-version = '0.4'
+from .VERSION import __version__
+version = __version__
diff --git a/setup.py b/setup.py
index b7fd3cf..79115a4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,13 +1,15 @@
 #!/usr/bin/env python3
 
 from setuptools import setup, find_packages
-import opencl_fdtd
 
 with open('README.md', 'r') as f:
     long_description = f.read()
 
+with open('opencl_fdtd/VERSION.py', 'rt') as f:
+    version = f.readlines()[2].strip()
+
 setup(name='opencl_fdtd',
-      version=opencl_fdtd.version,
+      version=version,
       description='OpenCL FDTD solver',
       long_description=long_description,
       long_description_content_type='text/markdown',

From da3216133aa8b321c491cf9d9edbd1f7d2597b66 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:05:02 -0700
Subject: [PATCH 11/36] clarify that just a plain sum is used (rather than
 mean)

---
 opencl_fdtd/kernels/update_h.cl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index a8a6b04..8fd6a5a 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -123,14 +123,14 @@ __global ftype *Sx = S + XX;
 __global ftype *Sy = S + YY;
 __global ftype *Sz = S + ZZ;
 
-// Average H across timesteps
-ftype aHxt = Hx[i] + Hx_old;
-ftype aHyt = Hy[i] + Hy_old;
-ftype aHzt = Hz[i] + Hz_old;
+// Sum old and new H
+ftype sHxt = Hx[i] + Hx_old;
+ftype sHyt = Hy[i] + Hy_old;
+ftype sHzt = Hz[i] + Hz_old;
 
-Sx[i] = Ey[i + px] * aHzt - Ez[i + px] * aHyt;
-Sy[i] = Ez[i + py] * aHxt - Ex[i + py] * aHzt;
-Sz[i] = Ex[i + pz] * aHyt - Ey[i + pz] * aHxt;
+Sx[i] = Ey[i + px] * sHzt - Ez[i + px] * sHyt;
+Sy[i] = Ez[i + py] * sHxt - Ex[i + py] * sHzt;
+Sz[i] = Ex[i + pz] * sHyt - Ey[i + pz] * sHxt;
 {%- endif -%}
 
 {% if do_poynting_halves -%}

From 922161c0e641ff1ea4273fbfc8410686ec69b55a Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:05:17 -0700
Subject: [PATCH 12/36] improve type annotation

---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 70db2a9..4fa4bae 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -357,7 +357,7 @@ class Simulation(object):
             return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
 
 
-def type_to_C(float_type) -> str:
+def type_to_C(float_type: numpy.dtype) -> str:
     """
     Returns a string corresponding to the C equivalent of a numpy type.
     Only works for float16, float32, float64.

From 2b96e5dd849b2667b54475b42b85d7eeccd6e2a1 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:05:42 -0700
Subject: [PATCH 13/36] simplify print

---
 opencl_fdtd/simulation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 4fa4bae..e06825c 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -31,8 +31,8 @@ class Simulation(object):
 
         pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
         sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
-        with open('sources.c', 'w') as f:
-            f.write('{}'.format(sim.sources))
+        with open('sources.c', 'wt') as f:
+            f.write(repr(sim.sources))
 
         for t in range(max_t):
             sim.update_E([]).wait()

From bbd33844b38dbe329b2ae09dad9aa3ca8190419b Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:05:55 -0700
Subject: [PATCH 14/36] add return type annotation

---
 opencl_fdtd/simulation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index e06825c..3c2f828 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -86,7 +86,8 @@ class Simulation(object):
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
                  do_poynting: bool = True,
                  do_poynting_halves: bool = False,
-                 do_fieldsrc: bool = False):
+                 do_fieldsrc: bool = False,
+                 ) -> None:
         """
         Initialize the simulation.
 

From 292fde05d5768d6850e29c30db41fda2c0ce0125 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:06:32 -0700
Subject: [PATCH 15/36] convert docstring to cleaner style

---
 opencl_fdtd/simulation.py | 61 +++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 32 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 3c2f828..3b5e480 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -23,7 +23,7 @@ jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
 
 
 class Simulation(object):
-    """
+    r"""
     Constructs and holds the basic FDTD operations and related fields
 
     After constructing this object, call the (update_E, update_H, update_S) members
@@ -91,39 +91,36 @@ class Simulation(object):
         """
         Initialize the simulation.
 
-        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
+        Args:
+            epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
                 spanning the simulation domain. Relative epsilon is used.
-        :param pmls: List of dicts with keys:
-            'axis': One of 'x', 'y', 'z'.
-            'direction': One of 'n', 'p'.
-            'thickness': Number of layers, default 8.
-            'epsilon_eff': Effective epsilon to match to. Default 1.0.
-            'mu_eff': Effective mu to match to. Default 1.0.
-            'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
-            'm': Polynomial grading exponent. Default 3.5.
-            'ma': Exponent for alpha. Default 1.
-        :param bloch_boundaries: List of dicts with keys:
-            'axis': One of 'x', 'y', 'z'.
-            'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
-            'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
-        :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
-        :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
-            specified fields (default is 0 everywhere). Fields have same format as epsilon.
-        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
-        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
-        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
-        :param do_poynting: If true, enables calculation of the poynting vector, S.
+            pmls: List of dicts with keys:
+                'axis': One of 'x', 'y', 'z'.
+                'direction': One of 'n', 'p'.
+                'thickness': Number of layers, default 8.
+                'epsilon_eff': Effective epsilon to match to. Default 1.0.
+                'mu_eff': Effective mu to match to. Default 1.0.
+                'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
+                'm': Polynomial grading exponent. Default 3.5.
+                'ma': Exponent for alpha. Default 1.
+            bloch_boundaries: List of dicts with keys:
+                'axis': One of 'x', 'y', 'z'.
+                'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
+                'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
+            dt: Time step. Default is min(dxes) * .99/sqrt(3).
+            initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
+                specified fields (default is 0 everywhere). Fields have same format as epsilon.
+            context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
+            queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
+            float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+            do_poynting: If true, enables calculation of the poynting vector, S.
                 Poynting vector calculation adds the following computational burdens:
-                    ****INACCURATE, TODO FIXME*****
-                    * During update_H, ~6 extra additions/cell are performed in order to spatially
-                        average E and temporally average H. These quantities are multiplied
-                        (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
-                    * update_S performs a discrete cross product using the precalculated products
-                        from update_H. This is not nice to the cache and similar to e.g. update_E
-                        in complexity.
-                    * GPU memory requirements are approximately doubled, since S and the intermediate
-                        products must be stored.
-        :param do_poynting_halves: TODO DOCUMENT
+                * During update_H, ~6 extra additions/cell are performed in order to temporally
+                    sum H. The results are then multiplied by E (6 multiplications/cell) and
+                    then stored (6 writes/cell, cache-friendly). The E-field components are
+                    reused from the H-field update and do not require additional H
+                * GPU memory requirements increase by 50% (for storing S)
+            do_poynting_halves: TODO DOCUMENT
         """
         if initial_fields is None:
             initial_fields = {}

From a6384429eefd1f6a4da3ccb7eddfe98cbde44ab5 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:06:43 -0700
Subject: [PATCH 16/36] use new email

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 79115a4..3338447 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ setup(name='opencl_fdtd',
       long_description=long_description,
       long_description_content_type='text/markdown',
       author='Jan Petykiewicz',
-      author_email='anewusername@gmail.com',
+      author_email='jan@mpxd.net',
       url='https://mpxd.net/code/jan/opencl_fdtd',
       packages=find_packages(),
       package_data={

From e58efd21276a6e27b81bc5f66eb22039889e6e00 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:06:52 -0700
Subject: [PATCH 17/36] depend on meanas

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 3338447..e2802e8 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ setup(name='opencl_fdtd',
             'numpy',
             'pyopencl',
             'jinja2',
-            'fdfd_tools>=0.3',
+            'meanas>=0.3',
       ],
       extras_require={
       },

From 1403b9e1a34a61e104643043aa71acf27131b8bb Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:32:14 -0700
Subject: [PATCH 18/36] vec comes from fdmath

---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 3b5e480..381047f 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -12,7 +12,7 @@ import pyopencl
 import pyopencl.array
 from pyopencl.elementwise import ElementwiseKernel
 
-from meanas import vec
+from meanas.fdmath import vec
 
 
 __author__ = 'Jan Petykiewicz'

From 4c81f33d05eec69061e0f281b3e46eed0ee4ac90 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 11 Jul 2021 17:35:25 -0700
Subject: [PATCH 19/36] vec/unvec live in meanas.fdmath

---
 fdtd.py                   | 2 +-
 opencl_fdtd/simulation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 37f70f9..e718177 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -228,7 +228,7 @@ def main():
 
     with lzma.open('saved_simulation', 'wb') as f:
         def unvec(f):
-            return meanas.unvec(f, grid.shape)
+            return meanas.fdmath.unvec(f, grid.shape)
         d = {
             'grid': grid,
             'E': unvec(sim.E.get()),
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 381047f..8023d81 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -48,7 +48,7 @@ class Simulation(object):
             event.wait()
 
             with lzma.open('saved_simulation', 'wb') as f:
-                dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
+                dill.dump(meanas.fdmath.unvec(sim.E.get(), grid.shape), f)
 
     Code in the form
         event2 = sim.update_H([event0, event1])

From 26ae8578c99f5613d50a48a65c796f9adf5db853 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 31 Oct 2021 19:47:25 -0700
Subject: [PATCH 20/36] update to use gridlock >=1.0

---
 fdtd.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index e718177..7f520ce 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -114,14 +114,17 @@ def main():
 #    edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-100.5, 101), numpy.arange(-1, 1)]
 
     # #### Create the grid, mask, and draw the device ####
-    grid = gridlock.Grid(edge_coords, initial=n_air**2, num_grids=3)
-#    grid.draw_slab(surface_normal=gridlock.Direction.z,
+    grid = gridlock.Grid(edge_coords)
+    epsilon = grid.allocate(n_air**2)
+#    grid.draw_slab(epsilon,
+#                   surface_normal=2,
 #                   center=[0, 0, 0],
 #                   thickness=th,
 #                   eps=n_slab**2)
 #    mask = perturbed_l3(a, r)
 #
-#    grid.draw_polygons(surface_normal=gridlock.Direction.z,
+#    grid.draw_polygons(epsilon,
+#                       surface_normal=2,
 #                       center=[0, 0, 0],
 #                       thickness=2 * th,
 #                       eps=n_air**2,
@@ -135,7 +138,7 @@ def main():
             for a in 'xz' for p in 'np']
     #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
     bloch = []
-    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
+    sim = Simulation(epsilon, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
@@ -188,7 +191,7 @@ def main():
 #            e = sim.update_G([e])
         e.wait()
 
-        S = sim.S.get().reshape(grid.grids.shape) * sim.dt * dx * dx *dx
+        S = sim.S.get().reshape(epsilon.shape) * sim.dt * dx * dx *dx
         m = 30
         planes[t] = (
             S[0][+pml_thickness+2, :, pml_thickness+3:-pml_thickness-3].sum(),
@@ -217,10 +220,10 @@ def main():
 #            )
         Ectr[t] = sim.E[ind].get()
         u[t] = pyopencl.array.sum(sim.E * sim.E * sim.eps + h_old * sim.H).get() * dx * dx * dx
-        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(grid.grids.shape).get()[:, pml_thickness+m:-pml_thickness-m, :,
-                                                                                             pml_thickness+m:-pml_thickness-m].sum() * dx * dx * dx
-#        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(grid.grids.shape).get()[:, pml_thickness+m:-pml_thickness-m,
-#                                                                                             pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
+        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m, :,
+                                                                                            pml_thickness+m:-pml_thickness-m].sum() * dx * dx * dx
+#        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m,
+#                                                                                            pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
 
         if t % 100 == 0:
             logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
@@ -231,6 +234,7 @@ def main():
             return meanas.fdmath.unvec(f, grid.shape)
         d = {
             'grid': grid,
+            'epsilon': epsilon,
             'E': unvec(sim.E.get()),
             'H': unvec(sim.H.get()),
             'dt': sim.dt,

From c75a7e67c97a3f1985a63c974edb33912945cebc Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 31 Oct 2021 19:47:48 -0700
Subject: [PATCH 21/36] f-stringify

---
 fdtd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fdtd.py b/fdtd.py
index 7f520ce..8a704ea 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -130,7 +130,7 @@ def main():
 #                       eps=n_air**2,
 #                       polygons=mask.as_polygons())
 
-    logger.info('grid shape: {}'.format(grid.shape))
+    logger.info(f'grid shape: {grid.shape}')
     # #### Create the simulation grid ####
 #    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
 #            for a in 'xyz' for p in 'np']

From c2e5c94e14894bea22fed40bdfde21d3e012f43e Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 31 Oct 2021 19:48:05 -0700
Subject: [PATCH 22/36] remove unused pcgen.py

---
 pcgen.py | 213 -------------------------------------------------------
 1 file changed, 213 deletions(-)
 delete mode 100644 pcgen.py

diff --git a/pcgen.py b/pcgen.py
deleted file mode 100644
index 9481472..0000000
--- a/pcgen.py
+++ /dev/null
@@ -1,213 +0,0 @@
-"""
-Routines for creating normalized 2D lattices and common photonic crystal
- cavity designs.
-"""
-
-from typing import List
-
-import numpy
-
-
-def triangular_lattice(dims: List[int],
-                       asymmetrical: bool=False
-                       ) -> numpy.ndarray:
-    """
-    Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
-     a triangular lattice in 2D. The lattice will be centered around (0, 0),
-     unless asymmetrical=True in which case there will be extra holes in the +x
-     direction.
-
-    :param dims: Number of lattice sites in the [x, y] directions.
-    :param asymmetrical: If true, each row in x will contain the same number of
-            lattice sites. If false, the structure is symmetrical around (0, 0).
-    :return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
-    """
-    dims = numpy.array(dims, dtype=int)
-
-    if asymmetrical:
-        k = 0
-    else:
-        k = 1
-
-    positions = []
-    ymax = (dims[1] - 1)/2
-    for j in numpy.linspace(-ymax, ymax, dims[0]):
-        j_odd = numpy.floor(j) % 2
-
-        x_offset = j_odd * 0.5
-        y_offset = j * numpy.sqrt(3)/2
-
-        num_x = dims[0] - k * j_odd
-        xmax = (dims[0] - 1)/2
-        xs = numpy.linspace(-xmax, xmax - k * j_odd, num_x) + x_offset
-        ys = numpy.full_like(xs, y_offset)
-
-        positions += [numpy.vstack((xs, ys)).T]
-
-    xy = numpy.vstack(tuple(positions))
-    return xy[xy[:, 0].argsort(), ]
-
-
-def square_lattice(dims: List[int]) -> numpy.ndarray:
-    """
-    Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
-     a square lattice in 2D. The lattice will be centered around (0, 0).
-
-    :param dims: Number of lattice sites in the [x, y] directions.
-    :return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
-    """
-    xs, ys = numpy.meshgrid(range(dims[0]), range(dims[1]), 'xy')
-    xs -= dims[0]/2
-    ys -= dims[1]/2
-    xy = numpy.vstack((xs.flatten(), ys.flatten())).T
-    return xy[xy[:, 0].argsort(), ]
-
-# ### Photonic crystal functions ###
-
-
-def nanobeam_holes(a_defect: float,
-                   num_defect_holes: int,
-                   num_mirror_holes: int
-                   ) -> numpy.ndarray:
-    """
-    Returns a list of [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
-     Creates a region in which the lattice constant and radius are progressively
-     (linearly) altered over num_defect_holes holes until they reach the value
-     specified by a_defect, then symmetrically returned to a lattice constant and
-     radius of 1, which is repeated num_mirror_holes times on each side.
-
-    :param a_defect: Minimum lattice constant for the defect, as a fraction of the
-            mirror lattice constant (ie., for no defect, a_defect = 1).
-    :param num_defect_holes: How many holes form the defect (per-side)
-    :param num_mirror_holes: How many holes form the mirror (per-side)
-    :return: Ndarray [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
-    """
-    a_values = numpy.linspace(a_defect, 1, num_defect_holes, endpoint=False)
-    xs = a_values.cumsum() - (a_values[0] / 2)  # Later mirroring makes center distance 2x as long
-    mirror_xs = numpy.arange(1, num_mirror_holes + 1) + xs[-1]
-    mirror_rs = numpy.ones_like(mirror_xs)
-    return numpy.vstack((numpy.hstack((-mirror_xs[::-1], -xs[::-1], xs, mirror_xs)),
-                         numpy.hstack((mirror_rs[::-1], a_values[::-1], a_values, mirror_rs)))).T
-
-
-def ln_defect(mirror_dims: List[int], defect_length: int) -> numpy.ndarray:
-    """
-    N-hole defect in a triangular lattice.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param defect_length: Length of defect. Should be an odd number.
-    :return: [[x0, y0], [x1, y1], ...] for all the holes
-    """
-    if defect_length % 2 != 1:
-        raise Exception('defect_length must be odd!')
-    p = triangular_lattice([2 * d + 1 for d in mirror_dims])
-    half_length = numpy.floor(defect_length / 2)
-    hole_nums = numpy.arange(-half_length, half_length + 1)
-    holes_to_keep = numpy.in1d(p[:, 0], hole_nums, invert=True)
-    return p[numpy.logical_or(holes_to_keep, p[:, 1] != 0), ]
-
-
-def ln_shift_defect(mirror_dims: List[int],
-                    defect_length: int,
-                    shifts_a: List[float]=(0.15, 0, 0.075),
-                    shifts_r: List[float]=(1, 1, 1)
-                    ) -> numpy.ndarray:
-    """
-    N-hole defect with shifted holes (intended to give the mode a gaussian profile
-     in real- and k-space so as to improve both Q and confinement). Holes along the
-     defect line are shifted and altered according to the shifts_* parameters.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param defect_length: Length of defect. Should be an odd number.
-    :param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
-    :param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
-    :return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
-    """
-    if not hasattr(shifts_a, "__len__") and shifts_a is not None:
-        shifts_a = [shifts_a]
-    if not hasattr(shifts_r, "__len__") and shifts_r is not None:
-        shifts_r = [shifts_r]
-
-    xy = ln_defect(mirror_dims, defect_length)
-
-    # Add column for radius
-    xyr = numpy.hstack((xy, numpy.ones((xy.shape[0], 1))))
-
-    # Shift holes
-    # Expand shifts as necessary
-    n_shifted = max(len(shifts_a), len(shifts_r))
-
-    tmp_a = numpy.array(shifts_a)
-    shifts_a = numpy.ones((n_shifted, ))
-    shifts_a[:len(tmp_a)] = tmp_a
-
-    tmp_r = numpy.array(shifts_r)
-    shifts_r = numpy.ones((n_shifted, ))
-    shifts_r[:len(tmp_r)] = tmp_r
-
-    x_removed = numpy.floor(defect_length / 2)
-
-    for ind in range(n_shifted):
-        for sign in (-1, 1):
-            x_val = sign * (x_removed + ind + 1)
-            which = numpy.logical_and(xyr[:, 0] == x_val, xyr[:, 1] == 0)
-            xyr[which, ] = (x_val + numpy.sign(x_val) * shifts_a[ind], 0, shifts_r[ind])
-
-    return xyr
-
-
-def r6_defect(mirror_dims: List[int]) -> numpy.ndarray:
-    """
-    R6 defect in a triangular lattice.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :return: [[x0, y0], [x1, y1], ...] specifying hole centers.
-    """
-    xy = triangular_lattice([2 * d + 1 for d in mirror_dims])
-
-    rem_holes_plus = numpy.array([[1, 0],
-                                  [0.5, +numpy.sqrt(3)/2],
-                                  [0.5, -numpy.sqrt(3)/2]])
-    rem_holes = numpy.vstack((rem_holes_plus, -rem_holes_plus))
-
-    for rem_xy in rem_holes:
-        xy = xy[(xy != rem_xy).any(axis=1), ]
-
-    return xy
-
-
-def l3_shift_perturbed_defect(mirror_dims: List[int],
-                              perturbed_radius: float=1.1,
-                              shifts_a: List[float]=(),
-                              shifts_r: List[float]=()
-                              ) -> numpy.ndarray:
-    """
-    3-hole defect with perturbed hole sizes intended to form an upwards-directed
-     beam. Can also include shifted holes along the defect line, intended
-     to give the mode a more gaussian profile to improve Q.
-
-    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
-            is 2 * n + 1 in each direction.
-    :param perturbed_radius: Amount to perturb the radius of the holes used for beam-forming
-    :param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
-    :param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
-    :return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
-    """
-    xyr = ln_shift_defect(mirror_dims, 3, shifts_a, shifts_r)
-
-    abs_x, abs_y = (numpy.fabs(xyr[:, i]) for i in (0, 1))
-
-    # Sorted unique xs and ys
-    # Ignore row y=0 because it might have shifted holes
-    xs = numpy.unique(abs_x[abs_x != 0])
-    ys = numpy.unique(abs_y)
-
-    # which holes should be perturbed? (xs[[3, 7]], ys[1]) and (xs[[2, 6]], ys[2])
-    perturbed_holes = ((xs[a], ys[b]) for a, b in ((3, 1), (7, 1), (2, 2), (6, 2)))
-    for row in xyr:
-        if numpy.fabs(row) in perturbed_holes:
-            row[2] = perturbed_radius
-    return xyr

From b2043ce8415244474ef66c0b391b0bbb1c61c280 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 31 Oct 2021 19:48:24 -0700
Subject: [PATCH 23/36] add comment about pml coords

---
 opencl_fdtd/kernels/update_e.cl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 9127181..28cfa80 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -92,7 +92,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
     dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
     dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
-    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; //note u uses v
     {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
     pE{{u}}i {{se}}= {{psi ~ u}}[ip];
     pE{{v}}i {{sh}}= {{psi ~ v}}[ip];

From f489eb6e06a64267977a46f6683329281d0ee5dd Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 31 Oct 2021 19:48:45 -0700
Subject: [PATCH 24/36] fix template loading

---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 8023d81..096ce7d 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -19,7 +19,7 @@ __author__ = 'Jan Petykiewicz'
 
 
 # Create jinja2 env on module load
-jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
+jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__.split('.')[0], 'kernels'))
 
 
 class Simulation(object):

From 08a93283901b7068bbf7e0b53eee32b2a7aea5f8 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Tue, 22 Nov 2022 14:31:43 -0800
Subject: [PATCH 25/36] move to hatch-based build

---
 opencl_fdtd/LICENSE.md  |  1 +
 opencl_fdtd/README.md   |  1 +
 opencl_fdtd/VERSION.py  |  4 ----
 opencl_fdtd/__init__.py |  3 +--
 pyproject.toml          | 46 +++++++++++++++++++++++++++++++++++++++++
 setup.py                | 32 ----------------------------
 6 files changed, 49 insertions(+), 38 deletions(-)
 create mode 120000 opencl_fdtd/LICENSE.md
 create mode 120000 opencl_fdtd/README.md
 delete mode 100644 opencl_fdtd/VERSION.py
 create mode 100644 pyproject.toml
 delete mode 100644 setup.py

diff --git a/opencl_fdtd/LICENSE.md b/opencl_fdtd/LICENSE.md
new file mode 120000
index 0000000..7eabdb1
--- /dev/null
+++ b/opencl_fdtd/LICENSE.md
@@ -0,0 +1 @@
+../LICENSE.md
\ No newline at end of file
diff --git a/opencl_fdtd/README.md b/opencl_fdtd/README.md
new file mode 120000
index 0000000..32d46ee
--- /dev/null
+++ b/opencl_fdtd/README.md
@@ -0,0 +1 @@
+../README.md
\ No newline at end of file
diff --git a/opencl_fdtd/VERSION.py b/opencl_fdtd/VERSION.py
deleted file mode 100644
index 202c26a..0000000
--- a/opencl_fdtd/VERSION.py
+++ /dev/null
@@ -1,4 +0,0 @@
-""" VERSION defintion. THIS FILE IS MANUALLY PARSED BY setup.py and REQUIRES A SPECIFIC FORMAT """
-__version__ = '''
-0.4
-'''.strip()
diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
index 9bdf6e0..f6596ad 100644
--- a/opencl_fdtd/__init__.py
+++ b/opencl_fdtd/__init__.py
@@ -1,6 +1,5 @@
 from .simulation import Simulation, type_to_C
 
 __author__ = 'Jan Petykiewicz'
-
-from .VERSION import __version__
+__vesion__ = '0.4'
 version = __version__
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..91160b8
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,46 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "opencl_fdtd"
+description = "OpenCL FDTD solver"
+readme = "README.md"
+license = { file = "LICENSE.md" }
+authors = [
+    { name="Jan Petykiewicz", email="jan@mpxd.net" },
+    ]
+homepage = "https://mpxd.net/code/jan/opencl_fdtd"
+repository = "https://mpxd.net/code/jan/opencl_fdtd"
+keywords = [
+    "FDTD",
+    "finite",
+    "difference",
+    "time",
+    "domain",
+    "simulation",
+    "optics",
+    "electromagnetic",
+    "dielectric",
+    ]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Manufacturing",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3",
+    "Topic :: Scientific/Engineering",
+    ]
+requires-python = ">=3.8"
+dynamic = ["version"]
+dependencies = [
+    "numpy~=1.21",
+    "pyopencl",
+    "jinja2",
+    "meanas>=0.3",
+    ]
+
+[tool.hatch.version]
+path = "opencl_fdtd/__init__.py"
+
diff --git a/setup.py b/setup.py
deleted file mode 100644
index e2802e8..0000000
--- a/setup.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python3
-
-from setuptools import setup, find_packages
-
-with open('README.md', 'r') as f:
-    long_description = f.read()
-
-with open('opencl_fdtd/VERSION.py', 'rt') as f:
-    version = f.readlines()[2].strip()
-
-setup(name='opencl_fdtd',
-      version=version,
-      description='OpenCL FDTD solver',
-      long_description=long_description,
-      long_description_content_type='text/markdown',
-      author='Jan Petykiewicz',
-      author_email='jan@mpxd.net',
-      url='https://mpxd.net/code/jan/opencl_fdtd',
-      packages=find_packages(),
-      package_data={
-          'opencl_fdfd': ['kernels/*']
-      },
-      install_requires=[
-            'numpy',
-            'pyopencl',
-            'jinja2',
-            'meanas>=0.3',
-      ],
-      extras_require={
-      },
-      )
-

From 77c10feead1b5eaecd5ee44591e75b482ec4c8b1 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Tue, 22 Nov 2022 14:31:57 -0800
Subject: [PATCH 26/36] gitignore caches

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 5298f42..9ad27c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,11 @@
 .idea/
 *.h5
 
-__pycache__
+__pycache__/
 *.py[cod]
 build/
 dist/
 *.egg-info/
 
+.pytest_cache/
+.mypy_cache/

From 689b3176cc4fc666f2f2b011a0fb9ceb21f4437b Mon Sep 17 00:00:00 2001
From: jan <jan@orange.mpxd.net>
Date: Thu, 24 Nov 2022 16:12:40 -0800
Subject: [PATCH 27/36] style and type fixes

---
 fdtd.py                        |  74 +++++++++--------
 opencl_fdtd/simulation.py      | 140 ++++++++++++++++++---------------
 opencl_fdtd/test_simulation.py |  61 ++++++++------
 3 files changed, 157 insertions(+), 118 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 8a704ea..6f9638c 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -30,55 +30,64 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
     """
     Generate a masque.Pattern object containing a perturbed L3 cavity.
 
-    :param a: Lattice constant.
-    :param radius: Hole radius, in units of a (lattice constant).
-    :param kwargs: Keyword arguments:
-        hole_dose, trench_dose, hole_layer, trench_layer: Shape properties for Pattern.
-                Defaults *_dose=1, hole_layer=0, trench_layer=1.
-        shifts_a, shifts_r: passed to pcgen.l3_shift; specifies lattice constant (1 -
-                multiplicative factor) and radius (multiplicative factor) for shifting
-                holes adjacent to the defect (same row). Defaults are 0.15 shift for
-                first hole, 0.075 shift for third hole, and no radius change.
+    Args:
+        a: Lattice constant.
+        radius: Hole radius, in units of a (lattice constant).
+        hole_dose: Dose for all holes. Default 1.
+        trench_dose: Dose for undercut trenches. Default 1.
+        hole_layer: Layer for holes. Default (0, 0).
+        trench_layer: Layer for undercut trenches. Default (1, 0).
+        shifts_a: passed to pcgen.l3_shift
+        shifts_r: passed to pcgen.l3_shift
         xy_size: [x, y] number of mirror periods in each direction; total size is
-                2 * n + 1 holes in each direction. Default [10, 10].
+                2 * n + 1 holes in each direction. Default (10, 10).
         perturbed_radius: radius of holes perturbed to form an upwards-driected beam
                 (multiplicative factor). Default 1.1.
         trench width: Width of the undercut trenches. Default 1.2e3.
-    :return: masque.Pattern object containing the L3 design
+
+    Returns:
+        `masque.Pattern` object containing the L3 design
     """
 
-    default_args = {'hole_dose':    1,
-                    'trench_dose':  1,
-                    'hole_layer':   0,
-                    'trench_layer': 1,
-                    'shifts_a':     (0.15, 0, 0.075),
-                    'shifts_r':     (1.0, 1.0, 1.0),
-                    'xy_size':      (10, 10),
-                    'perturbed_radius': 1.1,
-                    'trench_width': 1.2e3,
-                    }
+    default_args = {
+        'hole_dose':    1,
+        'trench_dose':  1,
+        'hole_layer':   0,
+        'trench_layer': 1,
+        'shifts_a':     (0.15, 0, 0.075),
+        'shifts_r':     (1.0, 1.0, 1.0),
+        'xy_size':      (10, 10),
+        'perturbed_radius': 1.1,
+        'trench_width': 1.2e3,
+        }
     kwargs = {**default_args, **kwargs}
 
-    xyr = pcgen.l3_shift_perturbed_defect(mirror_dims=kwargs['xy_size'],
-                                          perturbed_radius=kwargs['perturbed_radius'],
-                                          shifts_a=kwargs['shifts_a'],
-                                          shifts_r=kwargs['shifts_r'])
+    xyr = pcgen.l3_shift_perturbed_defect(
+        mirror_dims=kwargs['xy_size'],
+        perturbed_radius=kwargs['perturbed_radius'],
+        shifts_a=kwargs['shifts_a'],
+        shifts_r=kwargs['shifts_r'],
+        )
     xyr *= a
     xyr[:, 2] *= radius
 
     pat = Pattern()
-    pat.name = 'L3p-a{:g}r{:g}rp{:g}'.format(a, radius, kwargs['perturbed_radius'])
+    pat.name = f'L3p-a{a:g}r{radius:g}rp{kwargs["perturbed_radius"]:g}'
     pat.shapes += [shapes.Circle(radius=r, offset=(x, y),
                                  dose=kwargs['hole_dose'],
                                  layer=kwargs['hole_layer'])
                    for x, y, r in xyr]
 
     maxes = numpy.max(numpy.fabs(xyr), axis=0)
-    pat.shapes += [shapes.Polygon.rectangle(
-        lx=(2 * maxes[0]), ly=kwargs['trench_width'],
-        offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
-        dose=kwargs['trench_dose'], layer=kwargs['trench_layer'])
-                   for s in (-1, 1)]
+    pat.shapes += [
+        shapes.Polygon.rectangle(
+            lx=(2 * maxes[0]),
+            ly=kwargs['trench_width'],
+            offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
+            dose=kwargs['trench_dose'],
+            layer=kwargs['trench_layer'],
+            )
+        for s in (-1, 1)]
     return pat
 
 
@@ -226,7 +235,8 @@ def main():
 #                                                                                            pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
 
         if t % 100 == 0:
-            logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+            avg = (t + 1) / (time.perf_counter() - start)
+            logger.info(f'iteration {t}: average {avg} iterations per sec')
             sys.stdout.flush()
 
     with lzma.open('saved_simulation', 'wb') as f:
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 096ce7d..f2ef309 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -2,9 +2,10 @@
 Class for constructing and holding the basic FDTD operations and fields
 """
 
-from typing import List, Dict, Callable
+from typing import List, Dict, Callable, Type, Union, Optional, Sequence
 from collections import OrderedDict
 import numpy
+from numpy.typing import NDArray
 import jinja2
 import warnings
 
@@ -31,8 +32,8 @@ class Simulation(object):
 
         pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
         sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
-        with open('sources.c', 'wt') as f:
-            f.write(repr(sim.sources))
+        with open('sources.c', 'w') as f:
+            f.write(f'{sim.sources}')
 
         for t in range(max_t):
             sim.update_E([]).wait()
@@ -56,38 +57,38 @@ class Simulation(object):
      event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
      event2 can then be used to prepare further operations to be run after update_H.
     """
-    E = None            # type: pyopencl.array.Array
-    H = None            # type: pyopencl.array.Array
-    S = None            # type: pyopencl.array.Array
-    eps = None          # type: pyopencl.array.Array
-    dt = None           # type: float
-    inv_dxes = None     # type: List[pyopencl.array.Array]
+    E: pyopencl.array.Array
+    H: pyopencl.array.Array
+    S: pyopencl.array.Array
+    eps: pyopencl.array.Array
+    dt: float
+    inv_dxes: List[pyopencl.array.Array]
 
-    arg_type = None     # type: numpy.float32 or numpy.float64
+    arg_type: Type
 
-    context = None      # type: pyopencl.Context
-    queue = None        # type: pyopencl.CommandQueue
+    context: pyopencl.Context
+    queue: pyopencl.CommandQueue
 
-    update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_J = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    sources = None      # type: Dict[str, str]
+    update_E: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_H: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_S: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_J: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    sources: Dict[str, str]
 
-    def __init__(self,
-                 epsilon: List[numpy.ndarray],
-                 pmls: List[Dict[str, int or float]],
-                 bloch_boundaries: List[Dict[str, int or float]] = (),
-                 dxes: List[List[numpy.ndarray]] or float = None,
-                 dt: float = None,
-                 initial_fields: Dict[str, List[numpy.ndarray]] = None,
-                 context: pyopencl.Context = None,
-                 queue: pyopencl.CommandQueue = None,
-                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 do_poynting: bool = True,
-                 do_poynting_halves: bool = False,
-                 do_fieldsrc: bool = False,
-                 ) -> None:
+    def __init__(
+            self,
+            epsilon: NDArray,
+            pmls: Sequence[Dict[str, float]],
+            bloch_boundaries: Sequence[Dict[str, float]] = (),
+            dxes: Union[List[List[NDArray]], float, None] = None,
+            dt: Optional[float] = None,
+            initial_fields: Optional[Dict[str, NDArray]] = None,
+            context: Optional[pyopencl.Context] = None,
+            queue: Optional[pyopencl.CommandQueue] = None,
+            float_type: Type = numpy.float32,
+            do_poynting: bool = True,
+            do_fieldsrc: bool = False,
+            ) -> None:
         """
         Initialize the simulation.
 
@@ -113,14 +114,13 @@ class Simulation(object):
             context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
             queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
             float_type: numpy.float32 or numpy.float64. Default numpy.float32.
-            do_poynting: If true, enables calculation of the poynting vector, S.
+            do_poynting: If True, enables calculation of the poynting vector, S.
                 Poynting vector calculation adds the following computational burdens:
-                * During update_H, ~6 extra additions/cell are performed in order to temporally
-                    sum H. The results are then multiplied by E (6 multiplications/cell) and
+                * During update_H, 12 extra additions/cell are performed in order to temporally
+                    sum E and H. The results are then multiplied by E (6 multiplications/cell) and
                     then stored (6 writes/cell, cache-friendly). The E-field components are
                     reused from the H-field update and do not require additional H
                 * GPU memory requirements increase by 50% (for storing S)
-            do_poynting_halves: TODO DOCUMENT
         """
         if initial_fields is None:
             initial_fields = {}
@@ -147,9 +147,9 @@ class Simulation(object):
         if dt is None:
             self.dt = max_dt
         elif dt > max_dt:
-            warnings.warn('Warning: unstable dt: {}'.format(dt))
+            warnings.warn(f'Warning: unstable dt: {dt}')
         elif dt <= 0:
-            raise Exception('Invalid dt: {}'.format(dt))
+            raise Exception(f'Invalid dt: {dt}')
         else:
             self.dt = dt
 
@@ -216,10 +216,12 @@ class Simulation(object):
         if bloch_boundaries:
             bloch_args = jinja_args.copy()
             bloch_args['do_poynting'] = False
-            bloch_args['bloch'] = [{'axis': b['axis'],
-                                    'real': b['imag'],
-                                    'imag': b['real']}
-                                   for b in bloch_boundaries]
+            bloch_args['bloch'] = [
+                {'axis': b['axis'],
+                 'real': b['imag'],
+                 'imag': b['real'],
+                }
+                for b in bloch_boundaries]
             F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
             G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
             self.sources['F'] = F_source
@@ -316,15 +318,22 @@ class Simulation(object):
                 pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
         return pml_e_fields, pml_h_fields
 
-    def _create_operation(self, source, args_fields):
+    def _create_operation(self, source, args_fields) -> Callable[..., pyopencl.Event]:
         args = OrderedDict()
-        [args.update(d) for d in args_fields]
-        update = ElementwiseKernel(self.context, operation=source,
-                                   arguments=', '.join(args.keys()))
+        for d in args_fields:
+            args.update(d)
+        update = ElementwiseKernel(
+            self.context,
+            operation=source,
+            arguments=', '.join(args.keys()),
+            )
         return lambda e: update(*args.values(), wait_for=e)
 
-    def _create_context(self, context: pyopencl.Context = None,
-                        queue: pyopencl.CommandQueue = None):
+    def _create_context(
+            self,
+            context: Optional[pyopencl.Context] = None,
+            queue: Optional[pyopencl.CommandQueue] = None,
+            ) -> None:
         if context is None:
             self.context = pyopencl.create_some_context()
         else:
@@ -335,16 +344,16 @@ class Simulation(object):
         else:
             self.queue = queue
 
-    def _create_eps(self, epsilon: List[numpy.ndarray]):
+    def _create_eps(self, epsilon: NDArray) -> pyopencl.array.Array:
         if len(epsilon) != 3:
             raise Exception('Epsilon must be a list with length of 3')
         if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
             raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
         if not epsilon[0].shape == self.shape:
-            raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
+            raise Exception(f'Epsilon shape mismatch. Expected {self.shape}, got {epsilon[0].shape}')
         self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
 
-    def _create_field(self, initial_value: List[numpy.ndarray] = None):
+    def _create_field(self, initial_value: Optional[NDArray] = None) -> pyopencl.array.Array:
         if initial_value is None:
             return pyopencl.array.zeros_like(self.eps)
         else:
@@ -355,23 +364,30 @@ class Simulation(object):
             return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
 
 
-def type_to_C(float_type: numpy.dtype) -> str:
+def type_to_C(
+        float_type: Type,
+        ) -> str:
     """
     Returns a string corresponding to the C equivalent of a numpy type.
     Only works for float16, float32, float64.
 
-    :param float_type: e.g. numpy.float32
-    :return: string containing the corresponding C type (eg. 'double')
+    Args:
+        float_type: e.g. numpy.float32
+
+    Returns:
+        string containing the corresponding C type (eg. 'double')
     """
-    if float_type == numpy.float16:
-        arg_type = 'half'
-    elif float_type == numpy.float32:
-        arg_type = 'float'
-    elif float_type == numpy.float64:
-        arg_type = 'double'
-    else:
-        raise Exception('Unsupported type')
-    return arg_type
+    types = {
+        numpy.float16: 'half',
+        numpy.float32: 'float',
+        numpy.float64: 'double',
+        numpy.complex64: 'cfloat_t',
+        numpy.complex128: 'cdouble_t',
+    }
+    if float_type not in types:
+        raise Exception(f'Unsupported type: {float_type}')
+
+    return types[float_type]
 
 #            def par(x):
 #                scaling = ((x / (pml['thickness'])) ** pml['m'])
diff --git a/opencl_fdtd/test_simulation.py b/opencl_fdtd/test_simulation.py
index f2fafa2..c68a2cf 100644
--- a/opencl_fdtd/test_simulation.py
+++ b/opencl_fdtd/test_simulation.py
@@ -2,7 +2,7 @@ import unittest
 import numpy
 
 from opencl_fdtd import Simulation
-from fdfd_tools import fdtd
+from meanas import fdtd
 
 
 class BasicTests():
@@ -25,16 +25,18 @@ class BasicTests():
         dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in e0.shape[1:]) for _ in range(2))
         dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
         u0 = self.j_mag * self.j_mag / self.epsilon[self.src_mask] * dV[mask]
-        args = {'dxes': self.dxes,
-                'epsilon': self.epsilon}
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
 
         # Make sure initial energy and E dot J are correct
         energy0 = fdtd.energy_estep(h0=h0, e1=e0, h2=self.hs[1], **args)
         e_dot_j_0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes)
         self.assertTrue(numpy.allclose(energy0[mask], u0))
-        self.assertFalse(energy0[~mask].any(), msg='energy0: {}'.format(energy0))
+        self.assertFalse(energy0[~mask].any(), msg=f'{energy0=}')
         self.assertTrue(numpy.allclose(e_dot_j_0[mask], u0))
-        self.assertFalse(e_dot_j_0[~mask].any(), msg='e_dot_j_0: {}'.format(e_dot_j_0))
+        self.assertFalse(e_dot_j_0[~mask].any(), msg=f'{e_dot_j_0=}')
 
 
     def test_energy_conservation(self):
@@ -47,22 +49,25 @@ class BasicTests():
             with self.subTest(i=ii):
                 u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
                 u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
-                self.assertTrue(numpy.allclose(u_hstep.sum(), u0), msg='u_hstep: {}\n{}'.format(u_hstep.sum(), numpy.rollaxis(u_hstep, -1)))
-                self.assertTrue(numpy.allclose(u_estep.sum(), u0), msg='u_estep: {}\n{}'.format(u_estep.sum(), numpy.rollaxis(u_estep, -1)))
+                self.assertTrue(numpy.allclose(u_hstep.sum(), u0), msg=f'u_hstep: {u_hstep.sum()}\n{numpy.moveaxis(u_hstep, -1, 0)}')
+                self.assertTrue(numpy.allclose(u_estep.sum(), u0), msg=f'u_estep: {u_estep.sum()}\n{numpy.moveaxis(u_estep, -1, 0)}')
 
 
     def test_poynting(self):
         for ii in range(1, 3):
             with self.subTest(i=ii):
                 s = fdtd.poynting(e=self.es[ii], h=self.hs[ii+1] + self.hs[ii])
+                sf = numpy.moveaxis(s, -1, 0)
+                ss = numpy.moveaxis(self.ss[ii], -1, 0)
                 self.assertTrue(numpy.allclose(s, self.ss[ii], rtol=1e-4),
-                                msg='From ExH:\n{}\nFrom sim.S:\n{}'.format(numpy.rollaxis(s, -1),
-                                                                            numpy.rollaxis(self.ss[ii], -1)))
+                                msg=f'From ExH:\n{sf}\nFrom sim.S:\n{ss}')
 
 
     def test_poynting_divergence(self):
-        args = {'dxes': self.dxes,
-                'epsilon': self.epsilon}
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
 
         dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
         dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
@@ -75,9 +80,11 @@ class BasicTests():
 
                 du_half_h2e = u_estep - u_hstep
                 div_s_h2e = self.dt * fdtd.poynting_divergence(e=self.es[ii], h=self.hs[ii], dxes=self.dxes) * dV
+
+                du_half_h2e_f = numpy.moveaxis(du_half_h2e, -1, 0)
+                div_s_h2e_f = -numpy.moveaxis(div_s_h2e, -1, 0)
                 self.assertTrue(numpy.allclose(du_half_h2e, -div_s_h2e, rtol=1e-4),
-                                msg='du_half_h2e\n{}\ndiv_s_h2e\n{}'.format(numpy.rollaxis(du_half_h2e, -1),
-                                                                           -numpy.rollaxis(div_s_h2e, -1)))
+                                msg=f'du_half_h2e\n{du_half_h2e_f}\ndiv_s_h2e\n{div_s_h2e_f}')
 
                 if u_eprev is None:
                     u_eprev = u_estep
@@ -86,15 +93,18 @@ class BasicTests():
                 # previous half-step
                 du_half_e2h = u_hstep - u_eprev
                 div_s_e2h = self.dt * fdtd.poynting_divergence(e=self.es[ii-1], h=self.hs[ii], dxes=self.dxes) * dV
+                du_half_e2h_f = numpy.moveaxis(du_half_e2h, -1, 0)
+                div_s_e2h_f = -numpy.moveaxis(div_s_e2h, -1, 0)
                 self.assertTrue(numpy.allclose(du_half_e2h, -div_s_e2h, rtol=1e-4),
-                                msg='du_half_e2h\n{}\ndiv_s_e2h\n{}'.format(numpy.rollaxis(du_half_e2h, -1),
-                                                                           -numpy.rollaxis(div_s_e2h, -1)))
+                                msg=f'du_half_e2h\n{du_half_e2h_f}\ndiv_s_e2h\n{div_s_e2h_f}')
                 u_eprev = u_estep
 
 
     def test_poynting_planes(self):
-        args = {'dxes': self.dxes,
-                'epsilon': self.epsilon}
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
         dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
         dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
 
@@ -118,8 +128,9 @@ class BasicTests():
                 planes = [s_h2e[px].sum(), -s_h2e[mx].sum(),
                           s_h2e[py].sum(), -s_h2e[my].sum(),
                           s_h2e[pz].sum(), -s_h2e[mz].sum()]
+                du = (u_estep - u_hstep)[self.src_mask[1]]
                 self.assertTrue(numpy.allclose(sum(planes), (u_estep - u_hstep)[self.src_mask[1]]),
-                                msg='planes: {} (sum: {})\n du:\n {}'.format(planes, sum(planes), (u_estep - u_hstep)[self.src_mask[1]]))
+                                msg=f'planes: {planes} (sum: {sum(planes)})\n du:\n {du}')
 
                 if u_eprev is None:
                     u_eprev = u_estep
@@ -132,15 +143,14 @@ class BasicTests():
                 planes = [s_e2h[px].sum(), -s_e2h[mx].sum(),
                           s_e2h[py].sum(), -s_e2h[my].sum(),
                           s_e2h[pz].sum(), -s_e2h[mz].sum()]
+                du = (u_hstep - u_eprev)[self.src_mask[1]]
                 self.assertTrue(numpy.allclose(sum(planes), (u_hstep - u_eprev)[self.src_mask[1]]),
-                                msg='planes: {} (sum: {})\n du:\n {}'.format(planes, sum(planes), (u_hstep - u_eprev)[self.src_mask[1]]))
+                                msg=f'planes: {du} (sum: {sum(planes)})\n du:\n {du}')
 
                 # previous half-step
                 u_eprev = u_estep
 
 
-
-
 class Basic2DNoDXOnlyVacuum(unittest.TestCase, BasicTests):
     def setUp(self):
         shape = [3, 5, 5, 1]
@@ -348,8 +358,10 @@ class JdotE_3DUniformDX(unittest.TestCase):
         e1 = self.es[2]
         j0 = numpy.zeros_like(e0)
         j0[self.src_mask] = self.j_mag
-        args = {'dxes': self.dxes,
-                'epsilon': self.epsilon}
+        args = {
+            'dxes': self.dxes,
+            'epsilon': self.epsilon,
+            }
         e2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
 
         #ee = j0 * (2 * e0 - j0)
@@ -365,4 +377,5 @@ class JdotE_3DUniformDX(unittest.TestCase):
         u_hstep = fdtd.energy_hstep(e0=self.es[0], h1=self.hs[1], e2=self.es[1], **args)
         u_estep = fdtd.energy_estep(h0=self.hs[-2], e1=self.es[-2], h2=self.hs[-1], **args)
         #breakpoint()
-        self.assertTrue(numpy.allclose(u0.sum(), (u_estep - u_hstep).sum()), msg='{} != {}'.format(u0.sum(), (u_estep - u_hstep).sum()))
+        du = (u_estep - u_hstep).sum()
+        self.assertTrue(numpy.allclose(u0.sum(), (u_estep - u_hstep).sum()), msg=f'{u0.sum()} != {du}')

From 654f9c165b20454130aa2b3605abf356f0405308 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Wed, 10 Jan 2024 20:52:52 -0800
Subject: [PATCH 28/36] formatting

---
 fdtd.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 6f9638c..e311c0b 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -230,9 +230,9 @@ def main():
         Ectr[t] = sim.E[ind].get()
         u[t] = pyopencl.array.sum(sim.E * sim.E * sim.eps + h_old * sim.H).get() * dx * dx * dx
         ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m, :,
-                                                                                            pml_thickness+m:-pml_thickness-m].sum() * dx * dx * dx
+                                                                                          pml_thickness+m:-pml_thickness-m].sum() * dx * dx * dx
 #        ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m,
-#                                                                                            pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
+#                                                                                          pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
 
         if t % 100 == 0:
             avg = (t + 1) / (time.perf_counter() - start)

From d0011cb1f97a395e1619d1f27c34d930c90b0b47 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Wed, 10 Jan 2024 20:54:12 -0800
Subject: [PATCH 29/36] type modernization

---
 opencl_fdtd/simulation.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index f2ef309..31e017a 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -2,7 +2,7 @@
 Class for constructing and holding the basic FDTD operations and fields
 """
 
-from typing import List, Dict, Callable, Type, Union, Optional, Sequence
+from typing import Callable, Type, Sequence
 from collections import OrderedDict
 import numpy
 from numpy.typing import NDArray
@@ -62,29 +62,29 @@ class Simulation(object):
     S: pyopencl.array.Array
     eps: pyopencl.array.Array
     dt: float
-    inv_dxes: List[pyopencl.array.Array]
+    inv_dxes: list[pyopencl.array.Array]
 
     arg_type: Type
 
     context: pyopencl.Context
     queue: pyopencl.CommandQueue
 
-    update_E: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_H: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_S: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_J: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    sources: Dict[str, str]
+    update_E: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_H: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_S: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    update_J: Callable[[list[pyopencl.Event]], pyopencl.Event]
+    sources: dict[str, str]
 
     def __init__(
             self,
             epsilon: NDArray,
-            pmls: Sequence[Dict[str, float]],
-            bloch_boundaries: Sequence[Dict[str, float]] = (),
-            dxes: Union[List[List[NDArray]], float, None] = None,
-            dt: Optional[float] = None,
-            initial_fields: Optional[Dict[str, NDArray]] = None,
-            context: Optional[pyopencl.Context] = None,
-            queue: Optional[pyopencl.CommandQueue] = None,
+            pmls: Sequence[dict[str, float]],
+            bloch_boundaries: Sequence[dict[str, float]] = (),
+            dxes: list[list[NDArray]] | float | None = None,
+            dt: float | None = None,
+            initial_fields: dict[str, NDArray] | None = None,
+            context: pyopencl.Context | None = None,
+            queue: pyopencl.CommandQueue | None = None,
             float_type: Type = numpy.float32,
             do_poynting: bool = True,
             do_fieldsrc: bool = False,
@@ -331,8 +331,8 @@ class Simulation(object):
 
     def _create_context(
             self,
-            context: Optional[pyopencl.Context] = None,
-            queue: Optional[pyopencl.CommandQueue] = None,
+            context: pyopencl.Context | None = None,
+            queue: pyopencl.CommandQueue | None = None,
             ) -> None:
         if context is None:
             self.context = pyopencl.create_some_context()
@@ -353,7 +353,7 @@ class Simulation(object):
             raise Exception(f'Epsilon shape mismatch. Expected {self.shape}, got {epsilon[0].shape}')
         self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
 
-    def _create_field(self, initial_value: Optional[NDArray] = None) -> pyopencl.array.Array:
+    def _create_field(self, initial_value: NDArray | None = None) -> pyopencl.array.Array:
         if initial_value is None:
             return pyopencl.array.zeros_like(self.eps)
         else:

From 5b1e758c27de93430e7d468a1b928f6fa974cb2c Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Wed, 10 Jan 2024 20:57:23 -0800
Subject: [PATCH 30/36] typo fix

---
 opencl_fdtd/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
index f6596ad..0dcc446 100644
--- a/opencl_fdtd/__init__.py
+++ b/opencl_fdtd/__init__.py
@@ -1,5 +1,5 @@
 from .simulation import Simulation, type_to_C
 
 __author__ = 'Jan Petykiewicz'
-__vesion__ = '0.4'
+__version__ = '0.4'
 version = __version__

From b703f1ee20576dcc899df67824586662f0e4a29e Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Wed, 10 Jan 2024 20:57:40 -0800
Subject: [PATCH 31/36] drop object superclass

---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 31e017a..fbd282a 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -23,7 +23,7 @@ __author__ = 'Jan Petykiewicz'
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__.split('.')[0], 'kernels'))
 
 
-class Simulation(object):
+class Simulation:
     r"""
     Constructs and holds the basic FDTD operations and related fields
 

From 50b30d31fb77c9250e7b81da59a6f630fc7ca1ae Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Tue, 30 Jul 2024 23:18:54 -0700
Subject: [PATCH 32/36] fixes driven by ruff & mypy

---
 opencl_fdtd/__init__.py   |   5 +-
 opencl_fdtd/simulation.py | 142 ++++++++++++++++++++------------------
 2 files changed, 79 insertions(+), 68 deletions(-)

diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
index 0dcc446..5bb721e 100644
--- a/opencl_fdtd/__init__.py
+++ b/opencl_fdtd/__init__.py
@@ -1,4 +1,7 @@
-from .simulation import Simulation, type_to_C
+from .simulation import (
+    Simulation as Simulation,
+    type_to_C as type_to_C,
+    )
 
 __author__ = 'Jan Petykiewicz'
 __version__ = '0.4'
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index fbd282a..99ba3ad 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -2,12 +2,13 @@
 Class for constructing and holding the basic FDTD operations and fields
 """
 
-from typing import Callable, Type, Sequence
-from collections import OrderedDict
+from collections.abc import Callable, Sequence
 import numpy
 from numpy.typing import NDArray
+from numpy import floating, complexfloating
 import jinja2
 import warnings
+import logging
 
 import pyopencl
 import pyopencl.array
@@ -16,13 +17,18 @@ from pyopencl.elementwise import ElementwiseKernel
 from meanas.fdmath import vec
 
 
-__author__ = 'Jan Petykiewicz'
+logger = logging.getLogger(__name__)
 
 
 # Create jinja2 env on module load
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__.split('.')[0], 'kernels'))
 
 
+class FDTDError(Exception):
+    """ Custom exception for opencl_fdtd """
+    pass
+
+
 class Simulation:
     r"""
     Constructs and holds the basic FDTD operations and related fields
@@ -64,7 +70,7 @@ class Simulation:
     dt: float
     inv_dxes: list[pyopencl.array.Array]
 
-    arg_type: Type
+    arg_type: type
 
     context: pyopencl.Context
     queue: pyopencl.CommandQueue
@@ -85,7 +91,7 @@ class Simulation:
             initial_fields: dict[str, NDArray] | None = None,
             context: pyopencl.Context | None = None,
             queue: pyopencl.CommandQueue | None = None,
-            float_type: Type = numpy.float32,
+            float_type: type = numpy.float32,
             do_poynting: bool = True,
             do_fieldsrc: bool = False,
             ) -> None:
@@ -134,7 +140,7 @@ class Simulation:
         if dxes is None:
             dxes = 1.0
 
-        if isinstance(dxes, (float, int)):
+        if isinstance(dxes, float | int):
             uniform_dx = dxes
             min_dx = dxes
         else:
@@ -143,13 +149,14 @@ class Simulation:
             min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
 
         max_dt = min_dx * .99 / numpy.sqrt(3)
+        logger.info(f'{min_dx=}, {max_dt=}, {dt=}')
 
         if dt is None:
             self.dt = max_dt
         elif dt > max_dt:
-            warnings.warn(f'Warning: unstable dt: {dt}')
+            warnings.warn(f'Warning: unstable dt: {dt}', stacklevel=2)
         elif dt <= 0:
-            raise Exception(f'Invalid dt: {dt}')
+            raise FDTDError(f'Invalid dt: {dt}')
         else:
             self.dt = dt
 
@@ -173,28 +180,31 @@ class Simulation:
         def ptr(arg: str) -> str:
             return ctype + ' *' + arg
 
-        base_fields = OrderedDict()
-        base_fields[ptr('E')] = self.E
-        base_fields[ptr('H')] = self.H
-        base_fields[ctype + ' dt'] = self.dt
+        base_fields = {
+            ptr('E'): self.E,
+            ptr('H'): self.H,
+            ctype + ' dt': self.dt,
+            }
         if uniform_dx is False:
             inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
-            for name, field in zip(inv_dx_names, self.inv_dxes):
+            for name, field in zip(inv_dx_names, self.inv_dxes, strict=True):
                 base_fields[ptr(name)] = field
 
-        eps_field = OrderedDict()
-        eps_field[ptr('eps')] = self.eps
+        eps_field = {ptr('eps'): self.eps}
 
         if bloch_boundaries:
-            base_fields[ptr('F')] = self.F
-            base_fields[ptr('G')] = self.G
+            base_fields |= {
+                ptr('F'): self.F,
+                ptr('G'): self.G,
+                }
 
-            bloch_fields = OrderedDict()
-            bloch_fields[ptr('E')] = self.F
-            bloch_fields[ptr('H')] = self.G
-            bloch_fields[ctype + ' dt'] = self.dt
-            bloch_fields[ptr('F')] = self.E
-            bloch_fields[ptr('G')] = self.H
+            bloch_fields = {
+                ptr('E'): self.F,
+                ptr('H'): self.G,
+                ctype + ' dt': self.dt,
+                ptr('F'): self.E,
+                ptr('G'): self.H,
+                }
 
         common_source = jinja_env.get_template('common.cl').render(
             ftype=ctype,
@@ -216,18 +226,18 @@ class Simulation:
         if bloch_boundaries:
             bloch_args = jinja_args.copy()
             bloch_args['do_poynting'] = False
-            bloch_args['bloch'] = [
-                {'axis': b['axis'],
-                 'real': b['imag'],
-                 'imag': b['real'],
-                }
+            bloch_args['bloch'] = [{
+                    'axis': b['axis'],
+                    'real': b['imag'],
+                    'imag': b['real'],
+                    }
                 for b in bloch_boundaries]
             F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
             G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
             self.sources['F'] = F_source
             self.sources['G'] = G_source
 
-        S_fields = OrderedDict()
+        S_fields = {}
         if do_poynting:
             self.S = pyopencl.array.zeros_like(self.E)
             S_fields[ptr('S')] = self.S
@@ -237,7 +247,7 @@ class Simulation:
             S_fields[ptr('S0')] = self.S0
             S_fields[ptr('S1')] = self.S1
 
-        J_fields = OrderedDict()
+        J_fields = {}
         if do_fieldsrc:
             J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
             self.sources['J'] = J_source
@@ -247,37 +257,36 @@ class Simulation:
             J_fields[ptr('Jr')] = self.Jr
             J_fields[ptr('Ji')] = self.Ji
 
-        '''
-        PML
-        '''
+        #
+        # PML
+        #
         pml_e_fields, pml_h_fields = self._create_pmls(pmls)
         if bloch_boundaries:
             pml_f_fields, pml_g_fields = self._create_pmls(pmls)
 
-        '''
-        Create operations
-        '''
+        #
+        # Create operations
+        #
         self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
         self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
         if bloch_boundaries:
             self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
             self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
         if do_fieldsrc:
-            args = OrderedDict()
-            [args.update(d) for d in (base_fields, J_fields)]
+            args = base_fields | J_fields
             var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
             update = ElementwiseKernel(self.context, operation=J_source,
                                        arguments=', '.join(list(args.keys()) + var_args))
             self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
 
-    def _create_pmls(self, pmls):
+    def _create_pmls(self, pmls: Sequence[dict[str, float]]) -> tuple[dict[str, pyopencl.array.Array], ...]:
         ctype = type_to_C(self.arg_type)
 
         def ptr(arg: str) -> str:
             return ctype + ' *' + arg
 
-        pml_e_fields = OrderedDict()
-        pml_h_fields = OrderedDict()
+        pml_e_fields = {}
+        pml_h_fields = {}
         for pml in pmls:
             a = 'xyz'.find(pml['axis'])
 
@@ -285,7 +294,9 @@ class Simulation:
             kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
             alpha_max = pml['cfs_alpha']
 
-            def par(x):
+            print(sigma_max, kappa_max, alpha_max, pml['thickness'], self.dt)
+
+            def par(x, pml=pml, sigma_max=sigma_max, kappa_max=kappa_max, alpha_max=alpha_max):     # noqa: ANN001, ANN202
                 scaling = (x / pml['thickness']) ** pml['m']
                 sigma = scaling * sigma_max
                 kappa = 1 + scaling * (kappa_max - 1)
@@ -301,8 +312,13 @@ class Simulation:
             elif pml['polarity'] == 'n':
                 xh -= 0.5
 
+            logger.debug(f'{pml=}')
+            logger.debug(f'{xe=}')
+            logger.debug(f'{xh=}')
+            logger.debug(f'{par(xe)=}')
+            logger.debug(f'{par(xh)=}')
             pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
-            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
+            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh), strict=True):
                 pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
                 pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)
 
@@ -313,13 +329,13 @@ class Simulation:
             psi_shape = list(self.shape)
             psi_shape[a] = pml['thickness']
 
-            for ne, nh in zip(*psi_names):
+            for ne, nh in zip(*psi_names, strict=True):
                 pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
                 pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
         return pml_e_fields, pml_h_fields
 
-    def _create_operation(self, source, args_fields) -> Callable[..., pyopencl.Event]:
-        args = OrderedDict()
+    def _create_operation(self, source: str, args_fields: Sequence[dict[str, pyopencl.array.Array]]) -> Callable[..., pyopencl.Event]:
+        args = {}
         for d in args_fields:
             args.update(d)
         update = ElementwiseKernel(
@@ -334,38 +350,30 @@ class Simulation:
             context: pyopencl.Context | None = None,
             queue: pyopencl.CommandQueue | None = None,
             ) -> None:
-        if context is None:
-            self.context = pyopencl.create_some_context()
-        else:
-            self.context = context
-
-        if queue is None:
-            self.queue = pyopencl.CommandQueue(self.context)
-        else:
-            self.queue = queue
+        self.context = context or pyopencl.create_some_context()
+        self.queue = queue or pyopencl.CommandQueue(self.context)
 
     def _create_eps(self, epsilon: NDArray) -> pyopencl.array.Array:
         if len(epsilon) != 3:
-            raise Exception('Epsilon must be a list with length of 3')
-        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
-            raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+            raise FDTDError('Epsilon must be a list with length of 3')
+        if not all(e.shape == epsilon[0].shape for e in epsilon[1:]):
+            raise FDTDError('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
         if not epsilon[0].shape == self.shape:
-            raise Exception(f'Epsilon shape mismatch. Expected {self.shape}, got {epsilon[0].shape}')
+            raise FDTDError(f'Epsilon shape mismatch. Expected {self.shape}, got {epsilon[0].shape}')
         self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
 
     def _create_field(self, initial_value: NDArray | None = None) -> pyopencl.array.Array:
         if initial_value is None:
             return pyopencl.array.zeros_like(self.eps)
-        else:
-            if len(initial_value) != 3:
-                Exception('Initial field value must be a list of length 3')
-            if not all((f.shape == self.shape for f in initial_value)):
-                Exception('Initial field list elements must have same shape as epsilon elements')
-            return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
+        if len(initial_value) != 3:
+            raise FDTDError('Initial field value must be a list of length 3')
+        if not all(f.shape == self.shape for f in initial_value):
+            raise FDTDError('Initial field list elements must have same shape as epsilon elements')
+        return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
 
 
 def type_to_C(
-        float_type: Type,
+        float_type: type,
         ) -> str:
     """
     Returns a string corresponding to the C equivalent of a numpy type.
@@ -385,7 +393,7 @@ def type_to_C(
         numpy.complex128: 'cdouble_t',
     }
     if float_type not in types:
-        raise Exception(f'Unsupported type: {float_type}')
+        raise FDTDError(f'Unsupported type: {float_type}')
 
     return types[float_type]
 

From 7f620a9bb3f52c17f1c9169bf9857cbaf6194591 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Tue, 30 Jul 2024 23:19:07 -0700
Subject: [PATCH 33/36] modern rng approach

---
 opencl_fdtd/test_simulation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/test_simulation.py b/opencl_fdtd/test_simulation.py
index c68a2cf..9c8649a 100644
--- a/opencl_fdtd/test_simulation.py
+++ b/opencl_fdtd/test_simulation.py
@@ -323,8 +323,9 @@ class JdotE_3DUniformDX(unittest.TestCase):
         self.epsilon = numpy.full(shape, 4, dtype=float)
         self.epsilon[self.src_mask] = 2
 
-        e = numpy.random.randint(-128, 128 + 1, size=shape).astype(float)
-        h = numpy.random.randint(-128, 128 + 1, size=shape).astype(float)
+        rng = numpy.random.default_rng()
+        e = rng.integers(-128, 128 + 1, size=shape, dtype=float)
+        h = rng.integers(-128, 128 + 1, size=shape, dtype=float)
         self.es = [e]
         self.hs = [h]
         self.ss = []

From fe684d83d38079ddbae18f32c6946fbd55f595a5 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Tue, 30 Jul 2024 23:19:21 -0700
Subject: [PATCH 34/36] add ruff and mypy configs

---
 pyproject.toml | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 91160b8..2a2aaca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,3 +44,50 @@ dependencies = [
 [tool.hatch.version]
 path = "opencl_fdtd/__init__.py"
 
+
+[tool.ruff]
+exclude = [
+    ".git",
+    "dist",
+    ]
+line-length = 145
+indent-width = 4
+lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+lint.select = [
+    "NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG",
+    "C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT",
+    "ARG", "PL", "R", "TRY",
+    "G010", "G101", "G201", "G202",
+    "Q002", "Q003", "Q004",
+    ]
+lint.ignore = [
+    #"ANN001",   # No annotation
+    "ANN002",   # *args
+    "ANN003",   # **kwargs
+    "ANN401",   # Any
+    "ANN101",   # self: Self
+    "SIM108",   # single-line if / else assignment
+    "RET504",   # x=y+z; return x
+    "PIE790",   # unnecessary pass
+    "ISC003",   # non-implicit string concatenation
+    "C408",     # dict(x=y) instead of {'x': y}
+    "PLR09",    # Too many xxx
+    "PLR2004",  # magic number
+    "PLC0414",  # import x as x
+    "TRY003",   # Long exception message
+    ]
+
+
+[[tool.mypy.overrides]]
+module = [
+    "scipy",
+    "scipy.optimize",
+    "scipy.linalg",
+    "scipy.sparse",
+    "scipy.sparse.linalg",
+    "pyopencl",
+    "pyopencl.array",
+    "pyopencl.elementwise",
+    "pyopencl.reduction",
+    ]
+ignore_missing_imports = true

From 5b16295769d1b27f4cb267f511d561bee303a4b7 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Tue, 30 Jul 2024 23:19:31 -0700
Subject: [PATCH 35/36] update deps

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2a2aaca..e1a534a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,10 +32,10 @@ classifiers = [
     "License :: OSI Approved :: GNU Affero General Public License v3",
     "Topic :: Scientific/Engineering",
     ]
-requires-python = ">=3.8"
+requires-python = ">=3.11"
 dynamic = ["version"]
 dependencies = [
-    "numpy~=1.21",
+    "numpy>=1.26",
     "pyopencl",
     "jinja2",
     "meanas>=0.3",

From 2f37d97c3ef21fc7a121c9a672779f6280152d32 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <jan@mpxd.net>
Date: Fri, 31 Jan 2025 20:17:15 -0800
Subject: [PATCH 36/36] autodetect uniform dxes, and fix nonuniform dxes setup

---
 opencl_fdtd/simulation.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 99ba3ad..2c337ec 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -143,9 +143,12 @@ class Simulation:
         if isinstance(dxes, float | int):
             uniform_dx = dxes
             min_dx = dxes
+        elif all((dxes[0][0][0] == dx).all() for dx in dxes[0] + dxes[1]):
+            uniform_dx = dxes[0][0][0]
+            min_dx = dxes[0][0][0]
         else:
             uniform_dx = False
-            self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]]
+            self.inv_dxes = self._create_inv_dxes(dxes)
             min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
 
         max_dt = min_dx * .99 / numpy.sqrt(3)
@@ -371,6 +374,17 @@ class Simulation:
             raise FDTDError('Initial field list elements must have same shape as epsilon elements')
         return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
 
+    def _create_inv_dxes(self, dxes: list[list[NDArray]]) -> pyopencl.array.Array:
+        if len(dxes[0]) or len(dxes[1]) != 3:
+            raise FDTDError('dxes must be two lists of 3 1D ndarrays each')
+        if self.shape != tuple(len(dx) for dx in dxes[0]):
+            raise FDTDError('dxes must be compatible with shape of epsilon')
+        if self.shape != tuple(len(dx) for dx in dxes[1]):
+            raise FDTDError('dxes must be compatible with shape of epsilon')
+        self.inv_dxes = [
+            pyopencl.array.to_device(self.queue, (1 / dxn).astype(self.arg_type))
+            for dxn in chain(dxes[0], dxes[1])]
+
 
 def type_to_C(
         float_type: type,