From 727105f8fe114f62233820203b5c6eadbf1638ef Mon Sep 17 00:00:00 2001
From: jan <jan@orange.mpxd.net>
Date: Thu, 1 Sep 2016 14:39:44 -0700
Subject: [PATCH 01/33] use arrays for pml params

---
 fdtd.py            |  6 +++++-
 fdtd/boundary.py   | 24 ++++++------------------
 fdtd/simulation.py | 13 +++++++++----
 3 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 0ce560e..63d2337 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -156,8 +156,12 @@ def main():
         print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
         sys.stdout.flush()
 
+        if t % 20 == 0:
+            r = sum([(f * f * e).get().sum() for f, e in zip(sim.E, sim.eps)])
+            print('E sum', r)
+
         # Save field slices
-        if (t % 20 == 0 and (max_t - t < 1000 or t < 1000)) or t == max_t-1:
+        if (t % 20 == 0 and (max_t - t <= 1000 or t <= 2000)) or t == max_t-1:
             print('saving E-field')
             for j, f in enumerate(sim.E):
                 a = f.get()
diff --git a/fdtd/boundary.py b/fdtd/boundary.py
index bb8dbdf..6b50f9f 100644
--- a/fdtd/boundary.py
+++ b/fdtd/boundary.py
@@ -118,18 +118,12 @@ def cpml(direction: int,
         p0 = numpy.exp(-(sigma + alpha) * dt)
         p1 = sigma / (sigma + alpha) * (p0 - 1)
         return p0, p1
-    p0e, p1e = par(xe)
-    p0h, p1h = par(xh)
 
     vals = {'r': r,
             'u': uv[0],
             'v': uv[1],
             'np':   np,
             'th':   thickness,
-            'p0e': ', '.join((str(x) for x in p0e)),
-            'p1e': ', '.join((str(x) for x in p1e)),
-            'p0h': ', '.join((str(x) for x in p0h)),
-            'p1h': ', '.join((str(x) for x in p1h)),
             'se': '-+'[direction % 2],
             'sh': '+-'[direction % 2]}
 
@@ -149,24 +143,16 @@ if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{
         raise Exception('Bad polarity (=0)')
 
     code_e = """
-    // pml parameters:
-    const float p0[{th}] = {{ {p0e} }};
-    const float p1[{th}] = {{ {p1e} }};
-
-    Psi_{r}{np}_E{u}[ip] = p0[ir] * Psi_{r}{np}_E{u}[ip] + p1[ir] * (H{v}[i] - H{v}[i-di{r}]);
-    Psi_{r}{np}_E{v}[ip] = p0[ir] * Psi_{r}{np}_E{v}[ip] + p1[ir] * (H{u}[i] - H{u}[i-di{r}]);
+    Psi_{r}{np}_E{u}[ip] = p0e[ir] * Psi_{r}{np}_E{u}[ip] + p1e[ir] * (H{v}[i] - H{v}[i-di{r}]);
+    Psi_{r}{np}_E{v}[ip] = p0e[ir] * Psi_{r}{np}_E{v}[ip] + p1e[ir] * (H{u}[i] - H{u}[i-di{r}]);
 
     E{u}[i] {se}= dt / eps{u}[i] * Psi_{r}{np}_E{u}[ip];
     E{v}[i] {sh}= dt / eps{v}[i] * Psi_{r}{np}_E{v}[ip];
 }}
 """
     code_h = """
-    // pml parameters:
-    const float p0[{th}] = {{ {p0h} }};
-    const float p1[{th}] = {{ {p1h} }};
-
-    Psi_{r}{np}_H{u}[ip] = p0[ir] * Psi_{r}{np}_H{u}[ip] + p1[ir] * (E{v}[i+di{r}] - E{v}[i]);
-    Psi_{r}{np}_H{v}[ip] = p0[ir] * Psi_{r}{np}_H{v}[ip] + p1[ir] * (E{u}[i+di{r}] - E{u}[i]);
+    Psi_{r}{np}_H{u}[ip] = p0h[ir] * Psi_{r}{np}_H{u}[ip] + p1h[ir] * (E{v}[i+di{r}] - E{v}[i]);
+    Psi_{r}{np}_H{v}[ip] = p0h[ir] * Psi_{r}{np}_H{v}[ip] + p1h[ir] * (E{u}[i+di{r}] - E{u}[i]);
 
     H{u}[i] {sh}= dt * Psi_{r}{np}_H{u}[ip];
     H{v}[i] {se}= dt * Psi_{r}{np}_H{v}[ip];
@@ -180,6 +166,8 @@ if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{
                   'Psi_{r}{np}_E{v}'.format(**vals)],
         'psi_H': ['Psi_{r}{np}_H{u}'.format(**vals),
                   'Psi_{r}{np}_H{v}'.format(**vals)],
+        'pe': par(xe),
+        'ph': par(xh),
     }
 
     return pml_data
diff --git a/fdtd/simulation.py b/fdtd/simulation.py
index 227dcb0..6c63273 100644
--- a/fdtd/simulation.py
+++ b/fdtd/simulation.py
@@ -171,14 +171,19 @@ class Simulation(object):
         dt_arg = [ctype + ' dt']
         arglist_E = [ctype + ' *' + psi for psi in psi_E_names]
         arglist_H = [ctype + ' *' + psi for psi in psi_H_names]
-        pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E)
-        pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H)
+        pe_args = [ctype + ' *' + s for s in ('p0e', 'p1e')]
+        ph_args = [ctype + ' *' + s for s in ('p0h', 'p1h')]
+        pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E + pe_args)
+        pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H + ph_args)
 
         pml_E = ElementwiseKernel(self.context, arguments=pml_E_args, operation=pml_E_source)
         pml_H = ElementwiseKernel(self.context, arguments=pml_H_args, operation=pml_H_source)
 
-        self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, wait_for=e)
-        self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, wait_for=e)
+        pe = [pyopencl.array.to_device(self.queue, p) for p in pml_data['pe']]
+        ph = [pyopencl.array.to_device(self.queue, p) for p in pml_data['ph']]
+
+        self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, *pe, wait_for=e)
+        self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, *ph, wait_for=e)
         self.cmpl_psi_E = {k: v for k, v in zip(psi_E_names, psi_E)}
         self.cmpl_psi_H = {k: v for k, v in zip(psi_H_names, psi_H)}
 

From a6e601b6484406983b4b6b3e36250b9eb3fbe334 Mon Sep 17 00:00:00 2001
From: jan <janp@stanford.edu>
Date: Sat, 16 Jul 2016 17:44:51 -0700
Subject: [PATCH 02/33] Clean up comments

---
 fdtd.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 0ce560e..e586ee3 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -96,7 +96,8 @@ def main():
     z_max = 1.6 * a
     xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
 
-    # Coordinates of the edges of the cells. The fdtd package can only do square grids at the moment.
+    # Coordinates of the edges of the cells.
+    #  The fdtd package can only do square grids at the moment.
     half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
     edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
 
@@ -139,8 +140,8 @@ def main():
         return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2)
 
     # #### Run a bunch of iterations ####
-    # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued immediately and run
-    #  once prev_event is finished.
+    # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
+    #  immediately and run once prev_event is finished.
     output_file = h5py.File('simulation_output.h5', 'w')
     start = time.perf_counter()
     for t in range(max_t):

From 6c3313d7c985d87ad45f9eed145af4f91646ed19 Mon Sep 17 00:00:00 2001
From: jan <janp@stanford.edu>
Date: Sat, 16 Jul 2016 18:07:54 -0700
Subject: [PATCH 03/33] call overhead still way too big

---
 fdtd.py      | 9 ++++++---
 fdtd/base.py | 3 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index e586ee3..8e30fd0 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -75,7 +75,7 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
 def main():
     max_t = 8000            # number of timesteps
 
-    dx = 40                 # discretization (nm/cell)
+    dx = 15                 # discretization (nm/cell)
     pml_thickness = 8       # (number of cells)
 
     wl = 1550               # Excitation wavelength and fwhm
@@ -109,13 +109,16 @@ def main():
                    eps=n_slab**2)
     mask = perturbed_l3(a, r)
 
-    grid.draw_polygons(surface_normal=gridlock.Direction.z,
+    for i in range(5000):
+        print(i)
+        grid.draw_polygons(surface_normal=gridlock.Direction.z,
                        center=[0, 0, 0],
                        thickness=2 * th,
                        eps=n_air**2,
                        polygons=mask.as_polygons())
+    return
 
-    print(grid.shape)
+    print(grid.shape, numpy.prod(grid.shape))
     # #### Create the simulation grid ####
     sim = Simulation(grid.grids)
 
diff --git a/fdtd/base.py b/fdtd/base.py
index 6285fb9..3717a65 100644
--- a/fdtd/base.py
+++ b/fdtd/base.py
@@ -30,7 +30,8 @@ const int diy = sz;
 const int diz = 1;
 """
 
-# Given a linear index i and shape sx, sy, sz, defines x, y, and z as the 3D indices of the current element (i).
+# Given a linear index i and shape sx, sy, sz, defines x, y, and z
+#  as the 3D indices of the current element (i).
 xyz_source = """
 // Convert linear index to field index (xyz)
 const int x = i / (sz * sy);

From d34c478f1d7122028f9540737de47cbc08b2d3cc Mon Sep 17 00:00:00 2001
From: jan <jan@orange.mpxd.net>
Date: Thu, 1 Sep 2016 14:39:44 -0700
Subject: [PATCH 04/33] Rewrite, with the following features: - Move to jinja2
 templates for the opencl code - Combine PML code into the E, H updates for
 speed - Add Poynting vector calculation code, including precalculation during
 H update - Use arrays for PML parameters (p0, p1) - Switch to linearized,
 C-ordered fields (~50% performance boost??)

- Added jinja2 and fdfd_tools dependencies
---
 fdtd.py                  |  68 ++++++-----
 fdtd/base.py             |  72 -----------
 fdtd/boundary.py         | 185 ----------------------------
 fdtd/kernels/common.cl   |  84 +++++++++++++
 fdtd/kernels/update_e.cl |  78 ++++++++++++
 fdtd/kernels/update_h.cl | 125 +++++++++++++++++++
 fdtd/kernels/update_s.cl |  36 ++++++
 fdtd/simulation.py       | 258 ++++++++++++++++++++++-----------------
 requirements.txt         |   2 +
 9 files changed, 506 insertions(+), 402 deletions(-)
 delete mode 100644 fdtd/base.py
 delete mode 100644 fdtd/boundary.py
 create mode 100644 fdtd/kernels/common.cl
 create mode 100644 fdtd/kernels/update_e.cl
 create mode 100644 fdtd/kernels/update_h.cl
 create mode 100644 fdtd/kernels/update_s.cl

diff --git a/fdtd.py b/fdtd.py
index 0ce560e..ecd16b0 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -8,12 +8,17 @@ import sys
 import time
 
 import numpy
-import h5py
+import lzma 
+import dill
 
 from fdtd.simulation import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
+import fdfd_tools
+
+
+__author__ = 'Jan Petykiewicz'
 
 
 def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
@@ -75,7 +80,7 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
 def main():
     max_t = 8000            # number of timesteps
 
-    dx = 40                 # discretization (nm/cell)
+    dx = 25                 # discretization (nm/cell)
     pml_thickness = 8       # (number of cells)
 
     wl = 1550               # Excitation wavelength and fwhm
@@ -114,19 +119,9 @@ def main():
                        eps=n_air**2,
                        polygons=mask.as_polygons())
 
-    print(grid.shape)
+    print('grid shape: {}'.format(grid.shape))
     # #### Create the simulation grid ####
-    sim = Simulation(grid.grids)
-
-    # Conducting boundaries and pmls in every direction
-    c_args = []
-    pml_args = []
-    for d in (0, 1, 2):
-        for p in (-1, 1):
-            c_args += [{'direction': d, 'polarity': p}]
-            pml_args += [{'direction': d, 'polarity': p, 'epsilon_eff': n_slab**2}]
-    sim.init_conductors(c_args)
-    sim.init_cpml(pml_args)
+    sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
@@ -137,31 +132,44 @@ def main():
     def field_source(i):
         t0 = i * sim.dt - delay
         return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2)
+   
+    with open('sources.c', 'w') as f:
+        f.write(sim.sources['E'])
+        f.write('\n==========================================\n')
+        f.write(sim.sources['H'])
+        if sim.update_S:
+            f.write('\n==========================================\n')
+            f.write(sim.sources['S'])
 
     # #### Run a bunch of iterations ####
     # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued immediately and run
     #  once prev_event is finished.
-    output_file = h5py.File('simulation_output.h5', 'w')
     start = time.perf_counter()
     for t in range(max_t):
-        event = sim.cpml_E([])
-        sim.update_E([event]).wait()
+        sim.update_E([]).wait()
 
-        sim.E[1][tuple(grid.shape//2)] += field_source(t)
-        event = sim.conductor_E([])
-        event = sim.cpml_H([event])
-        event = sim.update_H([event])
-        sim.conductor_H([event]).wait()
+        ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
+        sim.E[ind] += field_source(t)
+        e = sim.update_H([])
+        if sim.update_S:
+            e = sim.update_S([e])
+        e.wait()
 
-        print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
-        sys.stdout.flush()
+        if t % 100 == 0:
+            print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+            sys.stdout.flush()
 
-        # Save field slices
-        if (t % 20 == 0 and (max_t - t < 1000 or t < 1000)) or t == max_t-1:
-            print('saving E-field')
-            for j, f in enumerate(sim.E):
-                a = f.get()
-                output_file['/E{}_t{}'.format('xyz'[j], t)] = a[:, :, round(a.shape[2]/2)]
+    with lzma.open('saved_simulation', 'wb') as f:
+        def unvec(f):
+            return fdfd_tools.unvec(f, grid.shape)
+        d = {
+            'grid': grid,
+            'E': unvec(sim.E.get()),
+            'H': unvec(sim.H.get()),
+            }
+        if sim.S is not None:
+            d['S'] = unvec(sim.S.get())
+        dill.dump(d, f)
 
 if __name__ == '__main__':
     main()
diff --git a/fdtd/base.py b/fdtd/base.py
deleted file mode 100644
index 6285fb9..0000000
--- a/fdtd/base.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-Basic code snippets for opencl FDTD
-"""
-
-from typing import List
-import numpy
-
-
-def shape_source(shape: List[int] or numpy.ndarray) -> str:
-    """
-    Defines sx, sy, sz C constants specifying the shape of the grid in each of the 3 dimensions.
-
-    :param shape: [sx, sy, sz] values.
-    :return: String containing C source.
-    """
-    sxyz = """
-// Field sizes
-const int sx = {shape[0]};
-const int sy = {shape[1]};
-const int sz = {shape[2]};
-""".format(shape=shape)
-    return sxyz
-
-# Defines dix, diy, diz constants used for stepping in the x, y, z directions in a linear array
-#  (ie, given Ex[i] referring to position (x, y, z), Ex[i+diy] will refer to position (x, y+1, z))
-dixyz_source = """
-// Convert offset in field xyz to linear index offset
-const int dix = sz * sy;
-const int diy = sz;
-const int diz = 1;
-"""
-
-# Given a linear index i and shape sx, sy, sz, defines x, y, and z as the 3D indices of the current element (i).
-xyz_source = """
-// Convert linear index to field index (xyz)
-const int x = i / (sz * sy);
-const int y = (i - x * sz * sy) / sz;
-const int z = (i - y * sz - x * sz * sy);
-"""
-
-# Source code for updating the E field; maxes use of dixyz_source.
-maxwell_E_source = """
-// E update equations
-Ex[i] += dt / epsx[i] * ((Hz[i] - Hz[i-diy]) - (Hy[i] - Hy[i-diz]));
-Ey[i] += dt / epsy[i] * ((Hx[i] - Hx[i-diz]) - (Hz[i] - Hz[i-dix]));
-Ez[i] += dt / epsz[i] * ((Hy[i] - Hy[i-dix]) - (Hx[i] - Hx[i-diy]));
-"""
-
-# Source code for updating the H field; maxes use of dixyz_source and assumes mu=0
-maxwell_H_source = """
-// H update equations
-Hx[i] -= dt * ((Ez[i+diy] - Ez[i]) - (Ey[i+diz] - Ey[i]));
-Hy[i] -= dt * ((Ex[i+diz] - Ex[i]) - (Ez[i+dix] - Ez[i]));
-Hz[i] -= dt * ((Ey[i+dix] - Ey[i]) - (Ex[i+diy] - Ex[i]));
-"""
-
-
-def type_to_C(float_type: numpy.float32 or numpy.float64) -> str:
-    """
-    Returns a string corresponding to the C equivalent of a numpy type.
-    Only works for float32 and float64.
-
-    :param float_type: numpy.float32 or numpy.float64
-    :return: string containing the corresponding C type (eg. 'double')
-    """
-    if float_type == numpy.float32:
-        arg_type = 'float'
-    elif float_type == numpy.float64:
-        arg_type = 'double'
-    else:
-        raise Exception('Unsupported type')
-    return arg_type
diff --git a/fdtd/boundary.py b/fdtd/boundary.py
deleted file mode 100644
index bb8dbdf..0000000
--- a/fdtd/boundary.py
+++ /dev/null
@@ -1,185 +0,0 @@
-from typing import List, Dict
-import numpy
-
-
-def conductor(direction: int,
-              polarity: int,
-              ) -> List[str]:
-    """
-    Create source code for conducting boundary conditions.
-
-    :param direction: integer in range(3), corresponding to x,y,z.
-    :param polarity: -1 or 1, specifying eg. a -x or +x boundary.
-    :return: [E_source, H_source] source code for E and H boundary update steps.
-    """
-    if direction not in range(3):
-        raise Exception('Invalid direction: {}'.format(direction))
-
-    if polarity not in (-1, 1):
-        raise Exception('Invalid polarity: {}'.format(polarity))
-
-    r = 'xyz'[direction]
-    uv = 'xyz'.replace(r, '')
-
-    if polarity < 0:
-        bc_e = """
-if ({r} == 0) {{
-    E{r}[i] = 0;
-    E{u}[i] = E{u}[i+di{r}];
-    E{v}[i] = E{v}[i+di{r}];
-}}
-"""
-        bc_h = """
-if ({r} == 0) {{
-    H{r}[i] = H{r}[i+di{r}];
-    H{u}[i] = 0;
-    H{v}[i] = 0;
-}}
-"""
-
-    elif polarity > 0:
-        bc_e = """
-if ({r} == s{r} - 1) {{
-    E{r}[i] = -E{r}[i-2*di{r}];
-    E{u}[i] = +E{u}[i-di{r}];
-    E{v}[i] = +E{v}[i-di{r}];
-}} else if ({r} == s{r} - 2) {{
-    E{r}[i] = 0;
-}}
-"""
-        bc_h = """
-if ({r} == s{r} - 1) {{
-    H{r}[i] = +H{r}[i-di{r}];
-    H{u}[i] = -H{u}[i-2*di{r}];
-    H{v}[i] = -H{v}[i-2*di{r}];
-}} else if ({r} == s{r} - 2) {{
-    H{u}[i] = 0;
-    H{v}[i] = 0;
-}}
-"""
-    else:
-        raise Exception()
-
-    replacements = {'r': r, 'u': uv[0], 'v': uv[1]}
-    return [s.format(**replacements) for s in (bc_e, bc_h)]
-
-
-def cpml(direction: int,
-         polarity: int,
-         dt: float,
-         thickness: int=8,
-         epsilon_eff: float=1,
-         ) -> Dict:
-    """
-    Generate source code for complex phase matched layer (cpml) absorbing boundaries.
-    These are not full boundary conditions and require a conducting boundary to be added
-     in the same direction as the pml.
-
-    :param direction: integer in range(3), corresponding to x, y, z directions.
-    :param polarity: -1 or 1, corresponding to eg. -x or +x direction.
-    :param dt: timestep used by the simulation
-    :param thickness: Number of cells used by the pml (the grid is NOT expanded to add these cells). Default 8.
-    :param epsilon_eff: Effective epsilon_r of the pml layer. Default 1.
-    :return: Dict with entries 'E', 'H' (update equations for E and H) and 'psi_E', 'psi_H' (lists of str,
-            specifying the field names of the cpml fields used in the E and H update steps. Eg.,
-            Psi_xn_Ex for the complex Ex component for the x- pml.)
-    """
-    if direction not in range(3):
-        raise Exception('Invalid direction: {}'.format(direction))
-
-    if polarity not in (-1, 1):
-        raise Exception('Invalid polarity: {}'.format(polarity))
-
-    if thickness <= 2:
-        raise Exception('It would be wise to have a pml with 4+ cells of thickness')
-
-    if epsilon_eff <= 0:
-        raise Exception('epsilon_eff must be positive')
-
-    m = (3.5, 1)
-    sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(epsilon_eff)
-    alpha_max = 0  # TODO: Decide what to do about non-zero alpha
-    transverse = numpy.delete(range(3), direction)
-
-    r = 'xyz'[direction]
-    np = 'nVp'[numpy.sign(polarity)+1]
-    uv = ['xyz'[i] for i in transverse]
-
-    xe = numpy.arange(1, thickness+1, dtype=float)[::-1]
-    xh = numpy.arange(1, thickness+1, dtype=float)[::-1]
-    if polarity > 0:
-        xe -= 0.5
-    elif polarity < 0:
-        xh -= 0.5
-
-    def par(x):
-        sigma = ((x / thickness) ** m[0]) * sigma_max
-        alpha = ((1 - x / thickness) ** m[1]) * alpha_max
-        p0 = numpy.exp(-(sigma + alpha) * dt)
-        p1 = sigma / (sigma + alpha) * (p0 - 1)
-        return p0, p1
-    p0e, p1e = par(xe)
-    p0h, p1h = par(xh)
-
-    vals = {'r': r,
-            'u': uv[0],
-            'v': uv[1],
-            'np':   np,
-            'th':   thickness,
-            'p0e': ', '.join((str(x) for x in p0e)),
-            'p1e': ', '.join((str(x) for x in p1e)),
-            'p0h': ', '.join((str(x) for x in p0h)),
-            'p1h': ', '.join((str(x) for x in p1h)),
-            'se': '-+'[direction % 2],
-            'sh': '+-'[direction % 2]}
-
-    if polarity < 0:
-        bounds_if = """
-if ( 0 < {r} && {r} < {th} + 1 ) {{
-    const int ir = {r} - 1;                             // index into pml parameters
-    const int ip = {v} + {u} * s{v} + ir * s{v} * s{u};   // linear index into Psi
-"""
-    elif polarity > 0:
-        bounds_if = """
-if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{
-    const int ir = (s{r} - 1) - ({r} + 1);              // index into pml parameters
-    const int ip = {v} + {u} * s{v} + ir * s{v} * s{u};   // linear index into Psi
-"""
-    else:
-        raise Exception('Bad polarity (=0)')
-
-    code_e = """
-    // pml parameters:
-    const float p0[{th}] = {{ {p0e} }};
-    const float p1[{th}] = {{ {p1e} }};
-
-    Psi_{r}{np}_E{u}[ip] = p0[ir] * Psi_{r}{np}_E{u}[ip] + p1[ir] * (H{v}[i] - H{v}[i-di{r}]);
-    Psi_{r}{np}_E{v}[ip] = p0[ir] * Psi_{r}{np}_E{v}[ip] + p1[ir] * (H{u}[i] - H{u}[i-di{r}]);
-
-    E{u}[i] {se}= dt / eps{u}[i] * Psi_{r}{np}_E{u}[ip];
-    E{v}[i] {sh}= dt / eps{v}[i] * Psi_{r}{np}_E{v}[ip];
-}}
-"""
-    code_h = """
-    // pml parameters:
-    const float p0[{th}] = {{ {p0h} }};
-    const float p1[{th}] = {{ {p1h} }};
-
-    Psi_{r}{np}_H{u}[ip] = p0[ir] * Psi_{r}{np}_H{u}[ip] + p1[ir] * (E{v}[i+di{r}] - E{v}[i]);
-    Psi_{r}{np}_H{v}[ip] = p0[ir] * Psi_{r}{np}_H{v}[ip] + p1[ir] * (E{u}[i+di{r}] - E{u}[i]);
-
-    H{u}[i] {sh}= dt * Psi_{r}{np}_H{u}[ip];
-    H{v}[i] {se}= dt * Psi_{r}{np}_H{v}[ip];
-}}
-"""
-
-    pml_data = {
-        'E': (bounds_if + code_e).format(**vals),
-        'H': (bounds_if + code_h).format(**vals),
-        'psi_E': ['Psi_{r}{np}_E{u}'.format(**vals),
-                  'Psi_{r}{np}_E{v}'.format(**vals)],
-        'psi_H': ['Psi_{r}{np}_H{u}'.format(**vals),
-                  'Psi_{r}{np}_H{v}'.format(**vals)],
-    }
-
-    return pml_data
diff --git a/fdtd/kernels/common.cl b/fdtd/kernels/common.cl
new file mode 100644
index 0000000..d203ca7
--- /dev/null
+++ b/fdtd/kernels/common.cl
@@ -0,0 +1,84 @@
+{#
+/* Common code for E, H updates
+ *
+ * Template parameters:
+ *  ftype	type name (e.g. float or double)
+ *  shape       list of 3 ints specifying shape of fields
+ */
+#}
+
+typedef {{ftype}} ftype;
+
+
+/*
+ * Field size info
+ */
+const size_t sx = {{shape[0]}};
+const size_t sy = {{shape[1]}};
+const size_t sz = {{shape[2]}};
+const size_t field_size = sx * sy * sz;
+
+//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
+// i is outside the bounds of Ex[].
+if (i >= field_size) {
+    PYOPENCL_ELWISE_CONTINUE;
+}
+
+
+/*
+ * Array indexing
+ */
+// Given a linear index i and shape (sx, sy, sz), defines x, y, and z
+//  as the 3D indices of the current element (i).
+// (ie, converts linear index [i] to field indices (x, y, z)
+const size_t x = i / (sz * sy);
+const size_t y = (i - x * sz * sy) / sz;
+const size_t z = (i - y * sz - x * sz * sy);
+
+// Calculate linear index offsets corresponding to offsets in 3D
+// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z)
+const size_t dix = sz * sy;
+const size_t diy = sz;
+const size_t diz = 1;
+
+
+/*
+ * Pointer math
+ */
+//Pointer offsets into the components of a linearized vector-field
+// (eg. Hx = H + XX, where H and Hx are pointers)
+const size_t XX = 0;
+const size_t YY = field_size;
+const size_t ZZ = field_size * 2;
+
+//Define pointers to vector components of each field (eg. Hx = H + XX)
+__global ftype *Ex = E + XX;
+__global ftype *Ey = E + YY;
+__global ftype *Ez = E + ZZ;
+
+__global ftype *Hx = H + XX;
+__global ftype *Hy = H + YY;
+__global ftype *Hz = H + ZZ;
+
+
+/*
+ * Implement periodic boundary conditions
+ *
+ * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction.
+ * In the event that we start at x == 0, we actually want to wrap around and grab the cell
+ * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix .
+ *
+ * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction.
+ * In the event that we start at x == (sx - 1), we actually want to wrap around and grab
+ * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
+ */
+{% for r in 'xyz' %}
+int m{{r}} = -di{{r}};
+int p{{r}} = +di{{r}};
+int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
+if ( {{r}} == 0 ) {
+  m{{r}} = wrap_{{r}};
+} else if ( {{r}} == s{{r}} - 1 ) {
+  p{{r}} = -wrap_{{r}};
+} 
+{% endfor %}
diff --git a/fdtd/kernels/update_e.cl b/fdtd/kernels/update_e.cl
new file mode 100644
index 0000000..2728b46
--- /dev/null
+++ b/fdtd/kernels/update_e.cl
@@ -0,0 +1,78 @@
+/*
+ *  Update E-field, including any PMLs.
+ *  
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
+ *   pml_thickness: Number of cells (integer)
+ *   
+ *  OpenCL args:
+ *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+__global ftype *epsx = eps + XX;
+__global ftype *epsy = eps + YY;
+__global ftype *epsz = eps + ZZ;
+
+{% if pmls -%}
+const int pml_thickness = {{pml_thickness}};
+{%- endif %}
+
+/*
+ *   Precalclate derivatives
+ */
+ftype dHxy = Hx[i] - Hx[i + my];
+ftype dHxz = Hx[i] - Hx[i + mz];
+
+ftype dHyx = Hy[i] - Hy[i + mx];
+ftype dHyz = Hy[i] - Hy[i + mz];
+
+ftype dHzx = Hz[i] - Hz[i + mx];
+ftype dHzy = Hz[i] - Hz[i + my];
+
+/*
+ *   PML Update
+ */
+// PML effects on E
+ftype pExi = 0;
+ftype pEyi = 0;
+ftype pEzi = 0;
+
+{% for r, p in pmls -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+    {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
+    {%- if r != 'y' -%}
+        {%- set se, sh = '-', '+' -%}
+    {%- else -%}
+        {%- set se, sh = '+', '-' -%}
+    {%- endif -%}
+
+    {%- if p == 'n' %}
+
+if ( {{r}} < pml_thickness ) {
+    const size_t ir = {{r}};                          // index into pml parameters
+
+    {%- elif p == 'p' %}
+
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
+
+    {%- endif %}
+    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
+    pE{{u}}i {{se}}= {{psi ~ u}}[ip];
+    pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
+}
+{%- endfor %}
+
+/*
+ *  Update E
+ */
+Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
+Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
+Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
diff --git a/fdtd/kernels/update_h.cl b/fdtd/kernels/update_h.cl
new file mode 100644
index 0000000..e093cb9
--- /dev/null
+++ b/fdtd/kernels/update_h.cl
@@ -0,0 +1,125 @@
+/*
+ *  Update H-field, including any PMLs.
+ *   Also precalculate values for poynting vector if necessary.
+ *  
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
+ *   pml_thickness: Number of cells (integer)
+ *   do_poynting: Whether to precalculate poynting vector components (boolean)  
+ *
+ *  OpenCL args:
+ *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+{% if pmls -%}
+const int pml_thickness = {{pml_thickness}};
+{%- endif %}
+
+/*
+ *  Precalculate derivatives
+ */
+ftype dExy = Ex[i + py] - Ex[i];
+ftype dExz = Ex[i + pz] - Ex[i];
+
+ftype dEyx = Ey[i + px] - Ey[i];
+ftype dEyz = Ey[i + pz] - Ey[i];
+
+ftype dEzx = Ez[i + px] - Ez[i];
+ftype dEzy = Ez[i + py] - Ez[i];
+
+{%- if do_poynting %}
+
+
+/*
+ *  Precalculate averaged E
+ */
+ftype aExy = Ex[i + py] + Ex[i];
+ftype aExz = Ex[i + pz] + Ex[i];
+
+ftype aEyx = Ey[i + px] + Ey[i];
+ftype aEyz = Ey[i + pz] + Ey[i];
+
+ftype aEzx = Ez[i + px] + Ez[i];
+ftype aEzy = Ez[i + py] + Ez[i];
+{%- endif %}
+
+
+/*
+ *  PML Update
+ */
+// PML contributions to H
+ftype pHxi = 0;
+ftype pHyi = 0;
+ftype pHzi = 0;
+
+{%- for r, p in pmls -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+    {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
+    {%- if r != 'y' -%}
+        {%- set se, sh = '-', '+' -%}
+    {%- else -%}
+        {%- set se, sh = '+', '-' -%}
+    {%- endif -%}
+
+    {%- if p == 'n' %}
+
+if ( {{r}} < pml_thickness ) {
+    const size_t ir = {{r}};                          // index into pml parameters
+
+    {%- elif p == 'p' %}
+
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
+
+    {%- endif %}
+    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
+    {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
+    pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
+    pH{{v}}i {{se}}= {{psi ~ v}}[ip];
+}
+{%- endfor %}
+
+/*
+ *  Update H
+ */
+{% if do_poynting -%}
+// Save old H for averaging
+ftype Hx_old = Hx[i];
+ftype Hy_old = Hy[i];
+ftype Hz_old = Hz[i];
+{%- endif %}
+
+// H update equations
+Hx[i] -= dt * (dEzy - dEyz + pHxi);
+Hy[i] -= dt * (dExz - dEzx + pHyi);
+Hz[i] -= dt * (dEyx - dExy + pHzi);
+
+{% if do_poynting -%}
+// Average H across timesteps
+ftype aHxt = Hx[i] + Hx_old;
+ftype aHyt = Hy[i] + Hy_old;
+ftype aHzt = Hz[i] + Hz_old;
+
+/*
+ *  Calculate unscaled S components at H locations
+ */
+__global ftype *oSxy = oS + 0 * field_size;
+__global ftype *oSyz = oS + 1 * field_size;
+__global ftype *oSzx = oS + 2 * field_size;
+__global ftype *oSxz = oS + 3 * field_size;
+__global ftype *oSyx = oS + 4 * field_size;
+__global ftype *oSzy = oS + 5 * field_size;
+
+oSxy[i] = aEyx * aHzt;
+oSxz[i] = -aEzx * aHyt;
+oSyz[i] = aEzy * aHxt;
+oSyx[i] = -aExy * aHzt;
+oSzx[i] = aExz * aHyt;
+oSzy[i] = -aEyz * aHxt;
+{%- endif -%}
diff --git a/fdtd/kernels/update_s.cl b/fdtd/kernels/update_s.cl
new file mode 100644
index 0000000..0310411
--- /dev/null
+++ b/fdtd/kernels/update_s.cl
@@ -0,0 +1,36 @@
+/*
+ *  Update E-field, including any PMLs.
+ *  
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
+ *   pml_thickness: Number of cells (integer)
+ *   
+ *  OpenCL args:
+ *   E, H, dt, S, oS
+ */
+
+{{common_header}}
+
+//////////////////////////////////////////////////////////////////////
+
+
+/*
+ * Calculate S from oS (pre-calculated components)
+ */ 
+__global ftype *Sx = S + XX;
+__global ftype *Sy = S + YY;
+__global ftype *Sz = S + ZZ;
+
+// Use unscaled S components from H locations 
+__global ftype *oSxy = oS + 0 * field_size;
+__global ftype *oSyz = oS + 1 * field_size;
+__global ftype *oSzx = oS + 2 * field_size;
+__global ftype *oSxz = oS + 3 * field_size;
+__global ftype *oSyx = oS + 4 * field_size;
+__global ftype *oSzy = oS + 5 * field_size;
+
+ftype s_factor = dt * 0.125;
+Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
+Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
+Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;
diff --git a/fdtd/simulation.py b/fdtd/simulation.py
index 227dcb0..3dd9f62 100644
--- a/fdtd/simulation.py
+++ b/fdtd/simulation.py
@@ -3,15 +3,23 @@ Class for constructing and holding the basic FDTD operations and fields
 """
 
 from typing import List, Dict, Callable
+from collections import OrderedDict
 import numpy
+import jinja2
 import warnings
 
 import pyopencl
 import pyopencl.array
 from pyopencl.elementwise import ElementwiseKernel
 
-from . import boundary, base
-from .base import type_to_C
+from fdfd_tools import vec
+
+
+__author__ = 'Jan Petykiewicz'
+
+
+# Create jinja2 env on module load
+jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
 
 
 class Simulation(object):
@@ -20,6 +28,7 @@ class Simulation(object):
     """
     E = None    # type: List[pyopencl.array.Array]
     H = None    # type: List[pyopencl.array.Array]
+    S = None    # type: List[pyopencl.array.Array]
     eps = None  # type: List[pyopencl.array.Array]
     dt = None   # type: float
 
@@ -30,24 +39,20 @@ class Simulation(object):
 
     update_E = None     # type: Callable[[],pyopencl.Event]
     update_H = None     # type: Callable[[],pyopencl.Event]
-
-    conductor_E = None  # type: Callable[[],pyopencl.Event]
-    conductor_H = None  # type: Callable[[],pyopencl.Event]
-
-    cpml_E = None       # type: Callable[[],pyopencl.Event]
-    cpml_H = None       # type: Callable[[],pyopencl.Event]
-
-    cpml_psi_E = None   # type: Dict[str, pyopencl.array.Array]
-    cpml_psi_H = None   # type: Dict[str, pyopencl.array.Array]
+    update_S = None     # type: Callable[[],pyopencl.Event]
+    sources = None      # type: Dict[str, str]
 
     def __init__(self,
                  epsilon: List[numpy.ndarray],
-                 dt: float=.99/numpy.sqrt(3),
-                 initial_E: List[numpy.ndarray]=None,
-                 initial_H: List[numpy.ndarray]=None,
-                 context: pyopencl.Context=None,
-                 queue: pyopencl.CommandQueue=None,
-                 float_type: numpy.float32 or numpy.float64=numpy.float32):
+                 dt: float = .99/numpy.sqrt(3),
+                 initial_E: List[numpy.ndarray] = None,
+                 initial_H: List[numpy.ndarray] = None,
+                 context: pyopencl.Context = None,
+                 queue: pyopencl.CommandQueue = None,
+                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
+                 pml_thickness: int = 10,
+                 pmls: List[List[str]] = None,
+                 do_poynting: bool = True):
         """
         Initialize the simulation.
 
@@ -67,7 +72,7 @@ class Simulation(object):
             Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
 
         if context is None:
-            self.context = pyopencl.create_some_context(False)
+            self.context = pyopencl.create_some_context()
         else:
             self.context = context
 
@@ -84,126 +89,149 @@ class Simulation(object):
             self.dt = dt
 
         self.arg_type = float_type
-
-        self.eps = [pyopencl.array.to_device(self.queue, e.astype(float_type)) for e in epsilon]
+        self.sources = {}
+        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type))
 
         if initial_E is None:
-            self.E = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)]
+            self.E = pyopencl.array.zeros_like(self.eps)
         else:
             if len(initial_E) != 3:
                 Exception('Initial_E must be a list of length 3')
             if not all((E.shape == epsilon[0].shape for E in initial_E)):
                 Exception('Initial_E list elements must have same shape as epsilon elements')
-            self.E = [pyopencl.array.to_device(self.queue, E.astype(float_type)) for E in initial_E]
+            self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
 
         if initial_H is None:
-            self.H = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)]
+            self.H = pyopencl.array.zeros_like(self.eps)
         else:
             if len(initial_H) != 3:
                 Exception('Initial_H must be a list of length 3')
             if not all((H.shape == epsilon[0].shape for H in initial_H)):
                 Exception('Initial_H list elements must have same shape as epsilon elements')
-            self.H = [pyopencl.array.to_device(self.queue, H.astype(float_type)) for H in initial_H]
+            self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
+
+        if pmls is None:
+            pmls = [[d, p] for d in 'xyz' for p in 'np']
 
         ctype = type_to_C(self.arg_type)
-        E_args = [ctype + ' *E' + c for c in 'xyz']
-        H_args = [ctype + ' *H' + c for c in 'xyz']
-        eps_args = [ctype + ' *eps' + c for c in 'xyz']
-        dt_arg = [ctype + ' dt']
+        def ptr(arg: str) -> str:
+            return ctype + ' *' + arg
 
-        sxyz = base.shape_source(epsilon[0].shape)
-        E_source = sxyz + base.dixyz_source + base.maxwell_E_source
-        H_source = sxyz + base.dixyz_source + base.maxwell_H_source
+        base_fields = OrderedDict()
+        base_fields[ptr('E')] = self.E
+        base_fields[ptr('H')] = self.H
+        base_fields[ctype + ' dt'] = self.dt
 
+        eps_field = OrderedDict()
+        eps_field[ptr('eps')] = self.eps
+
+        common_source = jinja_env.get_template('common.cl').render(
+                ftype=ctype,
+                shape=epsilon[0].shape,
+                )
+        jinja_args = {
+                'common_header': common_source,
+                'pml_thickness': pml_thickness,
+                'pmls': pmls,
+                'do_poynting': do_poynting,
+                }
+        E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
+        H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
+
+        self.sources['E'] = E_source
+        self.sources['H'] = H_source
+
+        if do_poynting:
+            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
+            self.sources['S'] = S_source
+
+            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type)
+            self.S = pyopencl.array.zeros_like(self.E)
+            S_fields = OrderedDict()
+            S_fields[ptr('oS')] = self.oS
+            S_fields[ptr('S')] = self.S
+        else:
+            S_fields = OrderedDict() 
+
+        '''
+        PML
+        '''
+        m = (3.5, 1)
+        sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0)
+        alpha_max = 0  # TODO: Decide what to do about non-zero alpha
+
+        def par(x):
+            sigma = ((x / pml_thickness) ** m[0]) * sigma_max
+            alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max
+            p0 = numpy.exp(-(sigma + alpha) * dt)
+            p1 = sigma / (sigma + alpha) * (p0 - 1)
+            return p0, p1
+        
+        xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
+        xep -= 0.5
+        xhn -= 0.5
+
+        pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh']
+        pml_e_fields = OrderedDict()
+        pml_h_fields = OrderedDict()
+        for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)):
+            pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe)
+            pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph)
+
+        for pml in pmls:
+            uv = 'xyz'.replace(pml[0], '')
+            psi_base = 'Psi_' + ''.join(pml) + '_'
+            psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
+
+            psi_shape = list(epsilon[0].shape)
+            psi_shape['xyz'.find(pml[0])] = pml_thickness
+
+            for ne, nh in zip(*psi_names):
+                pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+                pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+        
+        self.pml_e_fields = pml_e_fields
+        self.pml_h_fields = pml_h_fields
+
+
+        '''
+        Create operations
+        '''
+        E_args = OrderedDict()
+        [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)]
         E_update = ElementwiseKernel(self.context, operation=E_source,
-                                     arguments=', '.join(E_args + H_args + dt_arg + eps_args))
+                                     arguments=', '.join(E_args.keys()))
 
+        H_args = OrderedDict()
+        [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)]
         H_update = ElementwiseKernel(self.context, operation=H_source,
-                                     arguments=', '.join(E_args + H_args + dt_arg))
+                                     arguments=', '.join(H_args.keys()))
+        self.update_E = lambda e: E_update(*E_args.values(), wait_for=e)
+        self.update_H = lambda e: H_update(*H_args.values(), wait_for=e)
 
-        self.update_E = lambda e: E_update(*self.E, *self.H, self.dt, *self.eps, wait_for=e)
-        self.update_H = lambda e: H_update(*self.E, *self.H, self.dt, wait_for=e)
+        if do_poynting:
+            S_args = OrderedDict()
+            [S_args.update(d) for d in (base_fields, S_fields)]
+            S_update = ElementwiseKernel(self.context, operation=S_source,
+                                         arguments=', '.join(S_args.keys()))
 
-    def init_cpml(self, pml_args: List[Dict]):
-        """
-        Initialize absorbing layers (cpml: complex phase matched layer). PMLs are not actual
-         boundary conditions, so you should add a conducting boundary (.init_conductors()) for
-         all directions in which you add PMLs.
-        Allows use of self.cpml_E(events) and self.cpml_H(events).
-        All necessary additional fields are created on the opencl device.
+            self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
 
-        :param pml_args: A list containing dictionaries which are passed to .boundary.cpml(...).
-            The dt argument is set automatically, but the others must be passed in each entry
-             of pml_args.
-        """
-        sxyz = base.shape_source(self.eps[0].shape)
 
-        # Prepare per-iteration constants for later use
-        pml_E_source = sxyz + base.dixyz_source + base.xyz_source
-        pml_H_source = sxyz + base.dixyz_source + base.xyz_source
+def type_to_C(float_type) -> str:
+    """
+    Returns a string corresponding to the C equivalent of a numpy type.
+    Only works for float16, float32, float64.
 
-        psi_E = []
-        psi_H = []
-        psi_E_names = []
-        psi_H_names = []
-        for arg_set in pml_args:
-            pml_data = boundary.cpml(dt=self.dt, **arg_set)
-
-            pml_E_source += pml_data['E']
-            pml_H_source += pml_data['H']
-
-            ti = numpy.delete(range(3), arg_set['direction'])
-            trans = [self.eps[0].shape[i] for i in ti]
-            psi_shape = (8, trans[0], trans[1])
-
-            psi_E += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type)
-                      for _ in pml_data['psi_E']]
-            psi_H += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type)
-                      for _ in pml_data['psi_H']]
-
-            psi_E_names += pml_data['psi_E']
-            psi_H_names += pml_data['psi_H']
-
-        ctype = type_to_C(self.arg_type)
-        E_args = [ctype + ' *E' + c for c in 'xyz']
-        H_args = [ctype + ' *H' + c for c in 'xyz']
-        eps_args = [ctype + ' *eps' + c for c in 'xyz']
-        dt_arg = [ctype + ' dt']
-        arglist_E = [ctype + ' *' + psi for psi in psi_E_names]
-        arglist_H = [ctype + ' *' + psi for psi in psi_H_names]
-        pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E)
-        pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H)
-
-        pml_E = ElementwiseKernel(self.context, arguments=pml_E_args, operation=pml_E_source)
-        pml_H = ElementwiseKernel(self.context, arguments=pml_H_args, operation=pml_H_source)
-
-        self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, wait_for=e)
-        self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, wait_for=e)
-        self.cmpl_psi_E = {k: v for k, v in zip(psi_E_names, psi_E)}
-        self.cmpl_psi_H = {k: v for k, v in zip(psi_H_names, psi_H)}
-
-    def init_conductors(self, conductor_args: List[Dict]):
-        """
-        Initialize reflecting boundary conditions.
-        Allows use of self.conductor_E(events) and self.conductor_H(events).
-
-        :param conductor_args: List of dictionaries with which to call .boundary.conductor(...).
-        """
-
-        sxyz = base.shape_source(self.eps[0].shape)
-
-        # Prepare per-iteration constants
-        bc_E_source = sxyz + base.dixyz_source + base.xyz_source
-        bc_H_source = sxyz + base.dixyz_source + base.xyz_source
-        for arg_set in conductor_args:
-            [e, h] = boundary.conductor(**arg_set)
-            bc_E_source += e
-            bc_H_source += h
-
-        E_args = [type_to_C(self.arg_type) + ' *E' + c for c in 'xyz']
-        H_args = [type_to_C(self.arg_type) + ' *H' + c for c in 'xyz']
-        bc_E = ElementwiseKernel(self.context, arguments=E_args, operation=bc_E_source)
-        bc_H = ElementwiseKernel(self.context, arguments=H_args, operation=bc_H_source)
-
-        self.conductor_E = lambda e: bc_E(*self.E, wait_for=e)
-        self.conductor_H = lambda e: bc_H(*self.H, wait_for=e)
+    :param float_type: e.g. numpy.float32
+    :return: string containing the corresponding C type (eg. 'double')
+    """
+    if float_type == numpy.float16:
+        arg_type = 'half'
+    elif float_type == numpy.float32:
+        arg_type = 'float'
+    elif float_type == numpy.float64:
+        arg_type = 'double'
+    else:
+        raise Exception('Unsupported type')
+    return arg_type
diff --git a/requirements.txt b/requirements.txt
index 6121670..6ed651b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,8 @@
 numpy
 h5py
 pyopencl
+jinja2
 -e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster
 -e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock
 -e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque
+-e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools

From c1750e9fde087ad26ecad5434380bbd1f7e7be28 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Wed, 29 Mar 2017 01:09:21 -0700
Subject: [PATCH 05/33] Update readme

---
 README.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 5da2b6b..fbc972b 100644
--- a/README.md
+++ b/README.md
@@ -5,28 +5,32 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs).
 
 **Performance** highly depends on what hardware you have available:
 * A 395x345x73 cell simulation (~10 million points, 8-cell absorbing boundaries)
- runs at around 42 iterations/sec. on my Nvidia GTX 580.
-* On my laptop (Nvidia 940M) the same simulation achieves ~8 iterations/sec.
+ runs at around 91 iterations/sec. on my AMD RX480.
+* On an Nvidia GTX 580, it runs at 66 iterations/sec
+* On my laptop (Nvidia 940M) the same simulation achieves ~12 iterations/sec.
 * An L3 photonic crystal cavity ringdown simulation (1550nm source, 40nm
- discretization, 8000 steps) takes about 5 minutes on my laptop.
+ discretization, 8000 steps) takes about 3 minutes on my laptop.
 
 **Capabilities** are currently pretty minimal:
 * Absorbing boundaries (CPML)
-* Conducting boundaries (PMC)
+* Perfect electrical conductors (PECs; to use set epsilon to inf)
 * Anisotropic media (eps_xx, eps_yy, eps_zz, mu_xx, ...)
 * Direct access to fields (eg., you can trivially add a soft or hard
- current source with just sim.E[1] += sin(f0 * t), or save any portion
+ current source with just sim.E[ind] += sin(f0 * t), or save any portion
  of a field to a file)
 
+
 ## Installation
 
 **Requirements:**
 * python 3 (written and tested with 3.5)
 * numpy
 * pyopencl
-* h5py (for file output)
+* jinja2
+* dill (for file output)
 * [gridlock](https://mpxd.net/gogs/jan/gridlock)
 * [masque](https://mpxd.net/gogs/jan/masque)
+* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
 
 To get the code, just clone this repository:
 ```bash

From 97d9901e4b0356d4a8ab77d65013e7f5b73bc009 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sat, 12 Aug 2017 19:20:29 -0700
Subject: [PATCH 06/33] Use logging package for output

---
 fdtd.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index bbdabec..961bc0e 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -6,6 +6,7 @@ See main() for simulation setup.
 
 import sys
 import time
+import logging
 
 import numpy
 import lzma 
@@ -20,6 +21,9 @@ import fdfd_tools
 
 __author__ = 'Jan Petykiewicz'
 
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
 
 def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
     """
@@ -120,7 +124,7 @@ def main():
                        eps=n_air**2,
                        polygons=mask.as_polygons())
 
-    print('grid shape: {}'.format(grid.shape))
+    logger.info('grid shape: {}'.format(grid.shape))
     # #### Create the simulation grid ####
     sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
 
@@ -157,7 +161,7 @@ def main():
         e.wait()
 
         if t % 100 == 0:
-            print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+            logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
             sys.stdout.flush()
 
     with lzma.open('saved_simulation', 'wb') as f:

From 0d91f0d43efba28e91dc1a88b5f3acab9294b799 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Mon, 14 Aug 2017 15:41:20 -0700
Subject: [PATCH 07/33] rename lib

---
 fdtd.py                                   | 7 ++++---
 fdtd/__init__.py                          | 0
 opencl_fdtd/__init__.py                   | 1 +
 {fdtd => opencl_fdtd}/kernels/common.cl   | 0
 {fdtd => opencl_fdtd}/kernels/update_e.cl | 0
 {fdtd => opencl_fdtd}/kernels/update_h.cl | 0
 {fdtd => opencl_fdtd}/kernels/update_s.cl | 0
 {fdtd => opencl_fdtd}/simulation.py       | 6 +++---
 8 files changed, 8 insertions(+), 6 deletions(-)
 delete mode 100644 fdtd/__init__.py
 create mode 100644 opencl_fdtd/__init__.py
 rename {fdtd => opencl_fdtd}/kernels/common.cl (100%)
 rename {fdtd => opencl_fdtd}/kernels/update_e.cl (100%)
 rename {fdtd => opencl_fdtd}/kernels/update_h.cl (100%)
 rename {fdtd => opencl_fdtd}/kernels/update_s.cl (100%)
 rename {fdtd => opencl_fdtd}/simulation.py (99%)

diff --git a/fdtd.py b/fdtd.py
index bbdabec..13af0fd 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -8,10 +8,10 @@ import sys
 import time
 
 import numpy
-import lzma 
+import lzma
 import dill
 
-from fdtd.simulation import Simulation
+from opencl_fdtd import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
@@ -133,7 +133,7 @@ def main():
     def field_source(i):
         t0 = i * sim.dt - delay
         return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2)
-   
+
     with open('sources.c', 'w') as f:
         f.write(sim.sources['E'])
         f.write('\n==========================================\n')
@@ -172,5 +172,6 @@ def main():
             d['S'] = unvec(sim.S.get())
         dill.dump(d, f)
 
+
 if __name__ == '__main__':
     main()
diff --git a/fdtd/__init__.py b/fdtd/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
new file mode 100644
index 0000000..e3e95b4
--- /dev/null
+++ b/opencl_fdtd/__init__.py
@@ -0,0 +1 @@
+from .simulation import Simulation, type_to_C
diff --git a/fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl
similarity index 100%
rename from fdtd/kernels/common.cl
rename to opencl_fdtd/kernels/common.cl
diff --git a/fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
similarity index 100%
rename from fdtd/kernels/update_e.cl
rename to opencl_fdtd/kernels/update_e.cl
diff --git a/fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
similarity index 100%
rename from fdtd/kernels/update_h.cl
rename to opencl_fdtd/kernels/update_h.cl
diff --git a/fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl
similarity index 100%
rename from fdtd/kernels/update_s.cl
rename to opencl_fdtd/kernels/update_s.cl
diff --git a/fdtd/simulation.py b/opencl_fdtd/simulation.py
similarity index 99%
rename from fdtd/simulation.py
rename to opencl_fdtd/simulation.py
index f1a8dec..77a95df 100644
--- a/fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -152,7 +152,7 @@ class Simulation(object):
             S_fields[ptr('oS')] = self.oS
             S_fields[ptr('S')] = self.S
         else:
-            S_fields = OrderedDict() 
+            S_fields = OrderedDict()
 
         '''
         PML
@@ -167,7 +167,7 @@ class Simulation(object):
             p0 = numpy.exp(-(sigma + alpha) * dt)
             p1 = sigma / (sigma + alpha) * (p0 - 1)
             return p0, p1
-        
+
         xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
         xep -= 0.5
         xhn -= 0.5
@@ -190,7 +190,7 @@ class Simulation(object):
             for ne, nh in zip(*psi_names):
                 pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
                 pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
-        
+
         self.pml_e_fields = pml_e_fields
         self.pml_h_fields = pml_h_fields
 

From a85f5477490f8f175dc42e2e61a67026b4c9063a Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Thu, 24 Aug 2017 11:28:03 -0700
Subject: [PATCH 08/33] doc updates

---
 README.md                       |  4 ++-
 opencl_fdtd/kernels/update_e.cl |  4 +--
 opencl_fdtd/kernels/update_h.cl |  4 +--
 opencl_fdtd/kernels/update_s.cl |  8 ++---
 opencl_fdtd/simulation.py       | 52 ++++++++++++++++++++++++++++++---
 5 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index fbc972b..26d5acd 100644
--- a/README.md
+++ b/README.md
@@ -27,10 +27,12 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs).
 * numpy
 * pyopencl
 * jinja2
+* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
+
+Optional (used for examples):
 * dill (for file output)
 * [gridlock](https://mpxd.net/gogs/jan/gridlock)
 * [masque](https://mpxd.net/gogs/jan/masque)
-* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
 
 To get the code, just clone this repository:
 ```bash
diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 2728b46..4bda993 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -1,11 +1,11 @@
 /*
  *  Update E-field, including any PMLs.
- *  
+ *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
  *   pml_thickness: Number of cells (integer)
- *   
+ *
  *  OpenCL args:
  *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
  */
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index e093cb9..8519b51 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -1,12 +1,12 @@
 /*
  *  Update H-field, including any PMLs.
  *   Also precalculate values for poynting vector if necessary.
- *  
+ *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
  *   pml_thickness: Number of cells (integer)
- *   do_poynting: Whether to precalculate poynting vector components (boolean)  
+ *   do_poynting: Whether to precalculate poynting vector components (boolean)
  *
  *  OpenCL args:
  *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
diff --git a/opencl_fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl
index 0310411..94e061e 100644
--- a/opencl_fdtd/kernels/update_s.cl
+++ b/opencl_fdtd/kernels/update_s.cl
@@ -1,11 +1,11 @@
 /*
  *  Update E-field, including any PMLs.
- *  
+ *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
  *   pml_thickness: Number of cells (integer)
- *   
+ *
  *  OpenCL args:
  *   E, H, dt, S, oS
  */
@@ -17,12 +17,12 @@
 
 /*
  * Calculate S from oS (pre-calculated components)
- */ 
+ */
 __global ftype *Sx = S + XX;
 __global ftype *Sy = S + YY;
 __global ftype *Sz = S + ZZ;
 
-// Use unscaled S components from H locations 
+// Use unscaled S components from H locations
 __global ftype *oSxy = oS + 0 * field_size;
 __global ftype *oSyz = oS + 1 * field_size;
 __global ftype *oSzx = oS + 2 * field_size;
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 77a95df..7a49f39 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -25,6 +25,35 @@ jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
 class Simulation(object):
     """
     Constructs and holds the basic FDTD operations and related fields
+
+    After constructing this object, call the (update_E, update_H, update_S) members
+     to perform FDTD updates on the stored (E, H, S) fields:
+
+        sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
+        with open('sources.c', 'w') as f:
+            f.write('{}'.format(sim.sources))
+
+        for t in range(max_t):
+            sim.update_E([]).wait()
+
+            # Find the linear index for the center point, for Ey
+            ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \
+                    numpy.prod(grid.shape) * 1
+            # Perturb the field (i.e., add a soft current source)
+            sim.E[ind] += numpy.sin(omega * t * sim.dt)
+            event = sim.update_H([])
+            if sim.update_S:
+                event = sim.update_S([event])
+            event.wait()
+
+            with lzma.open('saved_simulation', 'wb') as f:
+                dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
+
+    Code in the form
+        event2 = sim.update_H([event0, event1])
+     indicates that the update_H operation should be prepared immediately, but wait for
+     event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
+     event2 can then be used to prepare further operations to be run after update_H.
     """
     E = None    # type: List[pyopencl.array.Array]
     H = None    # type: List[pyopencl.array.Array]
@@ -37,9 +66,9 @@ class Simulation(object):
     context = None      # type: pyopencl.Context
     queue = None        # type: pyopencl.CommandQueue
 
-    update_E = None     # type: Callable[[],pyopencl.Event]
-    update_H = None     # type: Callable[[],pyopencl.Event]
-    update_S = None     # type: Callable[[],pyopencl.Event]
+    update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
     sources = None      # type: Dict[str, str]
 
     def __init__(self,
@@ -50,8 +79,8 @@ class Simulation(object):
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 pml_thickness: int = 10,
                  pmls: List[List[str]] = None,
+                 pml_thickness: int = 10,
                  do_poynting: bool = True):
         """
         Initialize the simulation.
@@ -64,6 +93,21 @@ class Simulation(object):
         :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
         :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
         :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+        :param pmls: List of [axis, direction] pairs which specify simluation boundaries to be
+                'coated' with a PML (absorbing layer). Axis should be one of 'x', 'y', 'z', and
+                direction should be one of 'n', 'p' (i.e., negative, positive).
+                Default is to apply PMLs to all six boundaries.
+        :param pml_thickness: Thickness of any PMLs, in number of grid cells. Default 10.
+        :param do_poynting: If true, enables calculation of the poynting vector, S.
+                Poynting vector calculation adds the following computational burdens:
+                    * During update_H, ~6 extra additions/cell are performed in order to spatially
+                        average E and temporally average H. These quantities are multiplied
+                        (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
+                    * update_S performs a discrete cross product using the precalculated products
+                        from update_H. This is not nice to the cache and similar to e.g. update_E
+                        in complexity.
+                    * GPU memory requirements are approximately doubled, since S and the intermediate
+                        products must be stored.
         """
 
         if len(epsilon) != 3:

From d02cd18403e8c07b8c3de7a10b335fec180a148a Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 12:34:11 -0700
Subject: [PATCH 09/33] improve pml specification

---
 fdtd.py                         |  4 +-
 opencl_fdtd/kernels/update_e.cl | 17 +++----
 opencl_fdtd/kernels/update_h.cl | 20 ++++----
 opencl_fdtd/simulation.py       | 82 +++++++++++++++++++--------------
 4 files changed, 69 insertions(+), 54 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 5d0f4c3..08606c6 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -126,7 +126,9 @@ def main():
 
     logger.info('grid shape: {}'.format(grid.shape))
     # #### Create the simulation grid ####
-    sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
+    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
+            for a in 'xyz' for p in 'np']
+    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 4bda993..f7304e7 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -3,8 +3,8 @@
  *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
+ *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
+        axes, polarities, and thicknesses.
  *
  *  OpenCL args:
  *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
@@ -18,9 +18,6 @@ __global ftype *epsx = eps + XX;
 __global ftype *epsy = eps + YY;
 __global ftype *epsz = eps + ZZ;
 
-{% if pmls -%}
-const int pml_thickness = {{pml_thickness}};
-{%- endif %}
 
 /*
  *   Precalclate derivatives
@@ -42,7 +39,9 @@ ftype pExi = 0;
 ftype pEyi = 0;
 ftype pEzi = 0;
 
-{% for r, p in pmls -%}
+{% for pml in pmls -%}
+    {%- set r = pml['axis'] -%}
+    {%- set p = pml['polarity'] -%}
     {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
     {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
     {%- if r != 'y' -%}
@@ -51,14 +50,16 @@ ftype pEzi = 0;
         {%- set se, sh = '+', '-' -%}
     {%- endif -%}
 
+pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+
     {%- if p == 'n' %}
 
-if ( {{r}} < pml_thickness ) {
+if ( {{r}} < pml_{{r ~ p}_thickness ) {
     const size_t ir = {{r}};                          // index into pml parameters
 
     {%- elif p == 'p' %}
 
-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}_thickness ) {
     const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
 
     {%- endif %}
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 8519b51..7c648b6 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -4,22 +4,18 @@
  *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
+ *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
+        axes, polarities, and thicknesses.
  *   do_poynting: Whether to precalculate poynting vector components (boolean)
  *
  *  OpenCL args:
- *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
  */
 
 {{common_header}}
 
 ////////////////////////////////////////////////////////////////////////////
 
-{% if pmls -%}
-const int pml_thickness = {{pml_thickness}};
-{%- endif %}
-
 /*
  *  Precalculate derivatives
  */
@@ -57,7 +53,9 @@ ftype pHxi = 0;
 ftype pHyi = 0;
 ftype pHzi = 0;
 
-{%- for r, p in pmls -%}
+{% for pml in pmls -%}
+    {%- set r = pml['axis'] -%}
+    {%- set p = pml['polarity'] -%}
     {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
     {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
     {%- if r != 'y' -%}
@@ -66,14 +64,16 @@ ftype pHzi = 0;
         {%- set se, sh = '+', '-' -%}
     {%- endif -%}
 
+pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+
     {%- if p == 'n' %}
 
-if ( {{r}} < pml_thickness ) {
+if ( {{r}} < pml_{{r ~ p}}_thickness ) {
     const size_t ir = {{r}};                          // index into pml parameters
 
     {%- elif p == 'p' %}
 
-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
     const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
 
     {%- endif %}
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 7a49f39..78460ec 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -29,7 +29,8 @@ class Simulation(object):
     After constructing this object, call the (update_E, update_H, update_S) members
      to perform FDTD updates on the stored (E, H, S) fields:
 
-        sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
+        pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
+        sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
         with open('sources.c', 'w') as f:
             f.write('{}'.format(sim.sources))
 
@@ -73,31 +74,34 @@ class Simulation(object):
 
     def __init__(self,
                  epsilon: List[numpy.ndarray],
+                 pmls: List[Dict[str, int or float]],
                  dt: float = .99/numpy.sqrt(3),
                  initial_E: List[numpy.ndarray] = None,
                  initial_H: List[numpy.ndarray] = None,
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 pmls: List[List[str]] = None,
-                 pml_thickness: int = 10,
                  do_poynting: bool = True):
         """
         Initialize the simulation.
 
         :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
                 spanning the simulation domain. Relative epsilon is used.
-        :param dt: Time step. Default is the Courant factor.
+        :param pmls: List of dicts with keys:
+            'axis': One of 'x', 'y', 'z'.
+            'direction': One of 'n', 'p'.
+            'thickness': Number of layers, default 8.
+            'epsilon_eff': Effective epsilon to match to. Default 1.0.
+            'mu_eff': Effective mu to match to. Default 1.0.
+            'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
+            'm': Polynomial grading exponent. Default 3.5.
+            'ma': Exponent for alpha. Default 1.
+        :param dt: Time step. Default is .99/sqrt(3).
         :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
         :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
         :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
         :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
         :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
-        :param pmls: List of [axis, direction] pairs which specify simluation boundaries to be
-                'coated' with a PML (absorbing layer). Axis should be one of 'x', 'y', 'z', and
-                direction should be one of 'n', 'p' (i.e., negative, positive).
-                Default is to apply PMLs to all six boundaries.
-        :param pml_thickness: Thickness of any PMLs, in number of grid cells. Default 10.
         :param do_poynting: If true, enables calculation of the poynting vector, S.
                 Poynting vector calculation adds the following computational burdens:
                     * During update_H, ~6 extra additions/cell are performed in order to spatially
@@ -154,8 +158,13 @@ class Simulation(object):
                 Exception('Initial_H list elements must have same shape as epsilon elements')
             self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
 
-        if pmls is None:
-            pmls = [[d, p] for d in 'xyz' for p in 'np']
+        for pml in pmls:
+            pml.setdefault('thickness', 8)
+            pml.setdefault('epsilon_eff', 1.0)
+            pml.setdefault('mu_eff', 1.0)
+            pml.setdefault('ln_R_per_layer', -1.6)
+            pml.setdefault('m', 3.5)
+            pml.setdefault('ma', 1)
 
         ctype = type_to_C(self.arg_type)
 
@@ -176,7 +185,6 @@ class Simulation(object):
                 )
         jinja_args = {
                 'common_header': common_source,
-                'pml_thickness': pml_thickness,
                 'pmls': pmls,
                 'do_poynting': do_poynting,
                 }
@@ -201,35 +209,39 @@ class Simulation(object):
         '''
         PML
         '''
-        m = (3.5, 1)
-        sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0)
-        alpha_max = 0  # TODO: Decide what to do about non-zero alpha
-
-        def par(x):
-            sigma = ((x / pml_thickness) ** m[0]) * sigma_max
-            alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max
-            p0 = numpy.exp(-(sigma + alpha) * dt)
-            p1 = sigma / (sigma + alpha) * (p0 - 1)
-            return p0, p1
-
-        xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
-        xep -= 0.5
-        xhn -= 0.5
-
-        pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh']
         pml_e_fields = OrderedDict()
         pml_h_fields = OrderedDict()
-        for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)):
-            pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe)
-            pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph)
-
         for pml in pmls:
-            uv = 'xyz'.replace(pml[0], '')
-            psi_base = 'Psi_' + ''.join(pml) + '_'
+            a = 'xyz'.find(pml['axis'])
+
+            sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) / \
+                    numpy.sqrt(pml['epsilon_eff'] * pml['mu_eff'])
+            alpha_max = 0           # TODO: Nonzero alpha
+
+            def par(x):
+                sigma = ((x / pml['thickness']) ** pml['m']) * sigma_max
+                alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
+                p0 = numpy.exp(-(sigma + alpha) * dt)
+                p1 = sigma / (sigma + alpha) * (p0 - 1)
+                return p0, p1
+
+            xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=float_type)[::-1] for _ in range(2))
+            if pml['polarity'] == 'p':
+                xe -= 0.5
+            elif pml['polarity'] == 'n':
+                xh -= 0.5
+
+            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '01'] for eh in 'eh']
+            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
+                pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
+                pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)
+
+            uv = 'xyz'.replace(pml['axis'], '')
+            psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_'
             psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
 
             psi_shape = list(epsilon[0].shape)
-            psi_shape['xyz'.find(pml[0])] = pml_thickness
+            psi_shape[a] = pml['thickness']
 
             for ne, nh in zip(*psi_names):
                 pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)

From 6bd756c3d3ab8025ae3f317cc9e8e243987bf7a4 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 12:37:56 -0700
Subject: [PATCH 10/33] add setup.py

---
 setup.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 setup.py

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..a2f9f9d
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+from setuptools import setup, find_packages
+
+setup(name='opencl_fdtd',
+      version='0.4',
+      description='OpenCL FDTD solver',
+      author='Jan Petykiewicz',
+      author_email='anewusername@gmail.com',
+      url='https://mpxd.net/gogs/jan/opencl_fdtd',
+      packages=find_packages(),
+      package_data={
+          'opencl_fdfd': ['kernels/*']
+      },
+      install_requires=[
+            'numpy',
+            'pyopencl',
+            'jinja2',
+            'fdfd_tools>=0.3',
+      ],
+      extras_require={
+      },
+      )
+

From 9772f47a3482eb7a57872c0219d85753db8822ef Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 14:01:22 -0700
Subject: [PATCH 11/33] fix typo

---
 opencl_fdtd/kernels/update_e.cl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index f7304e7..9162b21 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -54,12 +54,12 @@ pml_{{r ~ p}}_thickness = {{pml['thickness']}};
 
     {%- if p == 'n' %}
 
-if ( {{r}} < pml_{{r ~ p}_thickness ) {
+if ( {{r}} < pml_{{r ~ p}}_thickness ) {
     const size_t ir = {{r}};                          // index into pml parameters
 
     {%- elif p == 'p' %}
 
-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}_thickness ) {
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
     const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
 
     {%- endif %}

From 6a03977a699a1cf5035e48f10d9b02496b0978f5 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 13:11:19 -0700
Subject: [PATCH 12/33] fix pml param names

---
 opencl_fdtd/kernels/update_e.cl | 4 ++--
 opencl_fdtd/kernels/update_h.cl | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 9162b21..63a0c6f 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -64,8 +64,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 
     {%- endif %}
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
-    {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
-    {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
+    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
     pE{{u}}i {{se}}= {{psi ~ u}}[ip];
     pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
 }
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 7c648b6..cdda3a9 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -78,8 +78,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 
     {%- endif %}
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
-    {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
-    {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
+    {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}};
+    {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}};
     pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
     pH{{v}}i {{se}}= {{psi ~ v}}[ip];
 }

From a67009d1c889208f1d6ec5fe2d91788d24a898b5 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 13:14:27 -0700
Subject: [PATCH 13/33] fix declaration

---
 opencl_fdtd/kernels/update_e.cl | 2 +-
 opencl_fdtd/kernels/update_h.cl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 63a0c6f..eed4ee5 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -50,7 +50,7 @@ ftype pEzi = 0;
         {%- set se, sh = '+', '-' -%}
     {%- endif -%}
 
-pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
 
     {%- if p == 'n' %}
 
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index cdda3a9..27d7a47 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -64,7 +64,7 @@ ftype pHzi = 0;
         {%- set se, sh = '+', '-' -%}
     {%- endif -%}
 
-pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
 
     {%- if p == 'n' %}
 

From aab57412a5b1ab39a4f2426c697612f20f1f3221 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Mon, 15 Jan 2018 22:36:40 -0800
Subject: [PATCH 14/33] move code to new location

---
 README.md        | 8 ++++----
 requirements.txt | 8 ++++----
 setup.py         | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 26d5acd..731450b 100644
--- a/README.md
+++ b/README.md
@@ -27,16 +27,16 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs).
 * numpy
 * pyopencl
 * jinja2
-* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
+* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools)
 
 Optional (used for examples):
 * dill (for file output)
-* [gridlock](https://mpxd.net/gogs/jan/gridlock)
-* [masque](https://mpxd.net/gogs/jan/masque)
+* [gridlock](https://mpxd.net/code/jan/gridlock)
+* [masque](https://mpxd.net/code/jan/masque)
 
 To get the code, just clone this repository:
 ```bash
-git clone https://mpxd.net/gogs/jan/opencl_fdtd.git
+git clone https://mpxd.net/code/jan/opencl_fdtd.git
 ```
 
 You can install the requirements and their dependencies easily with
diff --git a/requirements.txt b/requirements.txt
index 6ed651b..6626171 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy
 h5py
 pyopencl
 jinja2
--e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster
--e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock
--e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque
--e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools
+-e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster
+-e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock
+-e git+https://mpxd.net/code/jan/masque.git@release#egg=masque
+-e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools
diff --git a/setup.py b/setup.py
index a2f9f9d..55f5af9 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ setup(name='opencl_fdtd',
       description='OpenCL FDTD solver',
       author='Jan Petykiewicz',
       author_email='anewusername@gmail.com',
-      url='https://mpxd.net/gogs/jan/opencl_fdtd',
+      url='https://mpxd.net/code/jan/opencl_fdtd',
       packages=find_packages(),
       package_data={
           'opencl_fdfd': ['kernels/*']

From c8298d916fbec02925030d977288ab411d6d49c9 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 16 Sep 2018 20:12:26 -0700
Subject: [PATCH 15/33] Use python3 for setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 55f5af9..c7e55f3 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from setuptools import setup, find_packages
 

From d0b303523e735193c021a7b7de64841dbcc47bc3 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 16 Sep 2018 20:12:41 -0700
Subject: [PATCH 16/33] Move version string into module

---
 opencl_fdtd/__init__.py | 4 ++++
 setup.py                | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
index e3e95b4..b621c19 100644
--- a/opencl_fdtd/__init__.py
+++ b/opencl_fdtd/__init__.py
@@ -1 +1,5 @@
 from .simulation import Simulation, type_to_C
+
+__author__ = 'Jan Petykiewicz'
+
+version = '0.4'
diff --git a/setup.py b/setup.py
index c7e55f3..8b098db 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python3
 
 from setuptools import setup, find_packages
+import opencl_fdtd
 
 setup(name='opencl_fdtd',
-      version='0.4',
+      version=opencl_fdtd.version,
       description='OpenCL FDTD solver',
       author='Jan Petykiewicz',
       author_email='anewusername@gmail.com',

From 1de6fb0e39f39ad2b047116668ab44de0a5429e3 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 16 Sep 2018 20:12:52 -0700
Subject: [PATCH 17/33] Use readme as long_description

---
 setup.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/setup.py b/setup.py
index 8b098db..b7fd3cf 100644
--- a/setup.py
+++ b/setup.py
@@ -3,9 +3,14 @@
 from setuptools import setup, find_packages
 import opencl_fdtd
 
+with open('README.md', 'r') as f:
+    long_description = f.read()
+
 setup(name='opencl_fdtd',
       version=opencl_fdtd.version,
       description='OpenCL FDTD solver',
+      long_description=long_description,
+      long_description_content_type='text/markdown',
       author='Jan Petykiewicz',
       author_email='anewusername@gmail.com',
       url='https://mpxd.net/code/jan/opencl_fdtd',

From ea5e298023e74eb3c1865658cfdaedb1f06d9c58 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 01:35:20 -0800
Subject: [PATCH 18/33] Explicitly cast to int

---
 opencl_fdtd/kernels/common.cl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/opencl_fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl
index d203ca7..deecec9 100644
--- a/opencl_fdtd/kernels/common.cl
+++ b/opencl_fdtd/kernels/common.cl
@@ -73,9 +73,9 @@ __global ftype *Hz = H + ZZ;
  * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
  */
 {% for r in 'xyz' %}
-int m{{r}} = -di{{r}};
-int p{{r}} = +di{{r}};
-int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
+int m{{r}} = - (int) di{{r}};
+int p{{r}} = + (int) di{{r}};
+int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}};
 if ( {{r}} == 0 ) {
   m{{r}} = wrap_{{r}};
 } else if ( {{r}} == s{{r}} - 1 ) {

From 1e874cb0c029cae2beeb4a0fae5d60de53106a83 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 01:36:13 -0800
Subject: [PATCH 19/33] Fix sign on H component of PML

---
 opencl_fdtd/kernels/update_h.cl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 27d7a47..968c2b6 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -96,9 +96,9 @@ ftype Hz_old = Hz[i];
 {%- endif %}
 
 // H update equations
-Hx[i] -= dt * (dEzy - dEyz + pHxi);
-Hy[i] -= dt * (dExz - dEzx + pHyi);
-Hz[i] -= dt * (dEyx - dExy + pHzi);
+Hx[i] -= dt * (dEzy - dEyz - pHxi);
+Hy[i] -= dt * (dExz - dEzx - pHyi);
+Hz[i] -= dt * (dEyx - dExy - pHzi);
 
 {% if do_poynting -%}
 // Average H across timesteps

From 2b1d906665c6394561b613ba62af832bbfb72ea3 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 01:38:28 -0800
Subject: [PATCH 20/33] Add _create_field() and _create_eps()

---
 opencl_fdtd/simulation.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 78460ec..525d818 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -273,7 +273,24 @@ class Simulation(object):
                                          arguments=', '.join(S_args.keys()))
 
             self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
+    def _create_eps(self, epsilon: List[numpy.ndarray]):
+        if len(epsilon) != 3:
+            raise Exception('Epsilon must be a list with length of 3')
+        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
+            raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+        if not epsilon[0].shape == self.shape:
+            raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
+        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
 
+    def _create_field(self, initial_value: List[numpy.ndarray] = None):
+        if initial_value is None:
+            return pyopencl.array.zeros_like(self.eps)
+        else:
+            if len(initial_value) != 3:
+                Exception('Initial field value must be a list of length 3')
+            if not all((f.shape == self.shape for f in initial_value)):
+                Exception('Initial field list elements must have same shape as epsilon elements')
+            return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
 
 def type_to_C(float_type) -> str:
     """

From f00c8b4a3eb921fc8ed118f8b7f240178a5f3c47 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 01:59:05 -0800
Subject: [PATCH 21/33] Add _create_context(), _create_operation(), and
 _create_pmls(), and generalize initial field value args

---
 opencl_fdtd/simulation.py | 116 ++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 66 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 525d818..7b94c64 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -76,8 +76,7 @@ class Simulation(object):
                  epsilon: List[numpy.ndarray],
                  pmls: List[Dict[str, int or float]],
                  dt: float = .99/numpy.sqrt(3),
-                 initial_E: List[numpy.ndarray] = None,
-                 initial_H: List[numpy.ndarray] = None,
+                 initial_fields: Dict[str, List[numpy.ndarray]] = None,
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
@@ -113,21 +112,14 @@ class Simulation(object):
                     * GPU memory requirements are approximately doubled, since S and the intermediate
                         products must be stored.
         """
+        if initial_fields is None:
+            initial_fields = {}
 
-        if len(epsilon) != 3:
-            Exception('Epsilon must be a list with length of 3')
-        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
-            Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
-
-        if context is None:
-            self.context = pyopencl.create_some_context()
-        else:
-            self.context = context
-
-        if queue is None:
-            self.queue = pyopencl.CommandQueue(self.context)
-        else:
-            self.queue = queue
+        self.shape = epsilon[0].shape
+        self.arg_type = float_type
+        self.sources = {}
+        self._create_context(context, queue)
+        self._create_eps(epsilon)
 
         if dt > .99/numpy.sqrt(3):
             warnings.warn('Warning: unstable dt: {}'.format(dt))
@@ -136,27 +128,8 @@ class Simulation(object):
         else:
             self.dt = dt
 
-        self.arg_type = float_type
-        self.sources = {}
-        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type))
-
-        if initial_E is None:
-            self.E = pyopencl.array.zeros_like(self.eps)
-        else:
-            if len(initial_E) != 3:
-                Exception('Initial_E must be a list of length 3')
-            if not all((E.shape == epsilon[0].shape for E in initial_E)):
-                Exception('Initial_E list elements must have same shape as epsilon elements')
-            self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
-
-        if initial_H is None:
-            self.H = pyopencl.array.zeros_like(self.eps)
-        else:
-            if len(initial_H) != 3:
-                Exception('Initial_H must be a list of length 3')
-            if not all((H.shape == epsilon[0].shape for H in initial_H)):
-                Exception('Initial_H list elements must have same shape as epsilon elements')
-            self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
+        self.E = self._create_field(initial_fields.get('E', None))
+        self.H = self._create_field(initial_fields.get('H', None))
 
         for pml in pmls:
             pml.setdefault('thickness', 8)
@@ -181,7 +154,7 @@ class Simulation(object):
 
         common_source = jinja_env.get_template('common.cl').render(
                 ftype=ctype,
-                shape=epsilon[0].shape,
+                shape=self.shape,
                 )
         jinja_args = {
                 'common_header': common_source,
@@ -194,21 +167,37 @@ class Simulation(object):
         self.sources['E'] = E_source
         self.sources['H'] = H_source
 
+
+
+        S_fields = OrderedDict()
         if do_poynting:
             S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
             self.sources['S'] = S_source
 
-            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type)
+            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
             self.S = pyopencl.array.zeros_like(self.E)
-            S_fields = OrderedDict()
             S_fields[ptr('oS')] = self.oS
             S_fields[ptr('S')] = self.S
-        else:
-            S_fields = OrderedDict()
 
         '''
         PML
         '''
+        pml_e_fields, pml_h_fields = self._create_pmls(pmls)
+
+        '''
+        Create operations
+        '''
+        self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
+        self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
+        if do_poynting:
+            self.update_S = self._create_operation(S_source, (base_fields, S_fields))
+
+
+    def _create_pmls(self, pmls):
+        ctype = type_to_C(self.arg_type)
+        def ptr(arg: str) -> str:
+            return ctype + ' *' + arg
+
         pml_e_fields = OrderedDict()
         pml_h_fields = OrderedDict()
         for pml in pmls:
@@ -225,7 +214,7 @@ class Simulation(object):
                 p1 = sigma / (sigma + alpha) * (p0 - 1)
                 return p0, p1
 
-            xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=float_type)[::-1] for _ in range(2))
+            xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2))
             if pml['polarity'] == 'p':
                 xe -= 0.5
             elif pml['polarity'] == 'n':
@@ -240,39 +229,34 @@ class Simulation(object):
             psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_'
             psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
 
-            psi_shape = list(epsilon[0].shape)
+            psi_shape = list(self.shape)
             psi_shape[a] = pml['thickness']
 
             for ne, nh in zip(*psi_names):
                 pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
                 pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+        return pml_e_fields, pml_h_fields
 
-        self.pml_e_fields = pml_e_fields
-        self.pml_h_fields = pml_h_fields
+    def _create_operation(self, source, args_fields):
+        args = OrderedDict()
+        [args.update(d) for d in args_fields]
+        update = ElementwiseKernel(self.context, operation=source,
+                                   arguments=', '.join(args.keys()))
+        return lambda e: update(*args.values(), wait_for=e)
 
 
-        '''
-        Create operations
-        '''
-        E_args = OrderedDict()
-        [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)]
-        E_update = ElementwiseKernel(self.context, operation=E_source,
-                                     arguments=', '.join(E_args.keys()))
+    def _create_context(self, context: pyopencl.Context = None,
+                        queue: pyopencl.CommandQueue = None):
+        if context is None:
+            self.context = pyopencl.create_some_context()
+        else:
+            self.context = context
 
-        H_args = OrderedDict()
-        [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)]
-        H_update = ElementwiseKernel(self.context, operation=H_source,
-                                     arguments=', '.join(H_args.keys()))
-        self.update_E = lambda e: E_update(*E_args.values(), wait_for=e)
-        self.update_H = lambda e: H_update(*H_args.values(), wait_for=e)
+        if queue is None:
+            self.queue = pyopencl.CommandQueue(self.context)
+        else:
+            self.queue = queue
 
-        if do_poynting:
-            S_args = OrderedDict()
-            [S_args.update(d) for d in (base_fields, S_fields)]
-            S_update = ElementwiseKernel(self.context, operation=S_source,
-                                         arguments=', '.join(S_args.keys()))
-
-            self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
     def _create_eps(self, epsilon: List[numpy.ndarray]):
         if len(epsilon) != 3:
             raise Exception('Epsilon must be a list with length of 3')

From cb471df1827ac726e3a36d72a9868177897b90ba Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 02:00:30 -0800
Subject: [PATCH 22/33] Implement proper kappa for PML

---
 opencl_fdtd/kernels/update_e.cl |  2 ++
 opencl_fdtd/kernels/update_h.cl |  4 ++++
 opencl_fdtd/simulation.py       | 19 +++++++++++--------
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index eed4ee5..5a9ed1a 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -64,6 +64,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 
     {%- endif %}
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
+    dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
     {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
     {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
     pE{{u}}i {{se}}= {{psi ~ u}}[ip];
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 968c2b6..56d9bbc 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -45,6 +45,8 @@ ftype aEzy = Ez[i + py] + Ez[i];
 {%- endif %}
 
 
+
+
 /*
  *  PML Update
  */
@@ -78,6 +80,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 
     {%- endif %}
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    dE{{v ~ r}} *= p{{r}}2h{{p}}[ir];
+    dE{{u ~ r}} *= p{{r}}2h{{p}}[ir];
     {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}};
     {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}};
     pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 7b94c64..1b6d9a2 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -203,16 +203,19 @@ class Simulation(object):
         for pml in pmls:
             a = 'xyz'.find(pml['axis'])
 
-            sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) / \
-                    numpy.sqrt(pml['epsilon_eff'] * pml['mu_eff'])
-            alpha_max = 0           # TODO: Nonzero alpha
+            sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1)
+            kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
+            alpha_max = 0           # TODO: Nonzero alpha?
 
             def par(x):
-                sigma = ((x / pml['thickness']) ** pml['m']) * sigma_max
+                scaling = ((x / (pml['thickness'])) ** pml['m'])
+                sigma = scaling * sigma_max
+                kappa = 1 + scaling * (kappa_max - 1)
                 alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
-                p0 = numpy.exp(-(sigma + alpha) * dt)
-                p1 = sigma / (sigma + alpha) * (p0 - 1)
-                return p0, p1
+                p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt)
+                p1 = sigma / (sigma + kappa * alpha) * (p0 - 1)
+                p2 = 1/kappa
+                return p0, p1, p2
 
             xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2))
             if pml['polarity'] == 'p':
@@ -220,7 +223,7 @@ class Simulation(object):
             elif pml['polarity'] == 'n':
                 xh -= 0.5
 
-            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '01'] for eh in 'eh']
+            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
             for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
                 pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
                 pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)

From d8dc024626f872d2a9310745c96d02413b1e1982 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 02:01:28 -0800
Subject: [PATCH 23/33] Implementation of "source" fields (J)

---
 opencl_fdtd/kernels/update_j.cl | 32 ++++++++++++++++++++++++++++++++
 opencl_fdtd/simulation.py       | 24 +++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 opencl_fdtd/kernels/update_j.cl

diff --git a/opencl_fdtd/kernels/update_j.cl b/opencl_fdtd/kernels/update_j.cl
new file mode 100644
index 0000000..4d7f64d
--- /dev/null
+++ b/opencl_fdtd/kernels/update_j.cl
@@ -0,0 +1,32 @@
+/*
+ *  Update E-field from J field 
+ *
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *
+ *  OpenCL args:
+ *   E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax   
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+__global ftype *Jrx = Jr + XX;
+__global ftype *Jry = Jr + YY;
+__global ftype *Jrz = Jr + ZZ;
+__global ftype *Jix = Ji + XX;
+__global ftype *Jiy = Ji + YY;
+__global ftype *Jiz = Ji + ZZ;
+
+
+if (x < xmin || y < ymin || z < zmin) {
+   PYOPENCL_ELWISE_CONTINUE;
+}
+if (x >= xmax || y >= ymax || z >= zmax) {
+   PYOPENCL_ELWISE_CONTINUE;
+}
+
+Ex[i] += c * Jrx[i] + s * Jix[i];
+Ey[i] += c * Jry[i] + s * Jiy[i];
+Ez[i] += c * Jrz[i] + s * Jiz[i];
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 1b6d9a2..6e12b2e 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -70,6 +70,7 @@ class Simulation(object):
     update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
     update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
     update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_J = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
     sources = None      # type: Dict[str, str]
 
     def __init__(self,
@@ -80,7 +81,8 @@ class Simulation(object):
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 do_poynting: bool = True):
+                 do_poynting: bool = True,
+                 do_fieldsrc: bool = False):
         """
         Initialize the simulation.
 
@@ -179,6 +181,17 @@ class Simulation(object):
             S_fields[ptr('oS')] = self.oS
             S_fields[ptr('S')] = self.S
 
+        J_fields = OrderedDict()
+        if do_fieldsrc:
+            J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
+            self.sources['J'] = J_source
+
+            self.Ji = pyopencl.array.zeros_like(self.E)
+            self.Jr = pyopencl.array.zeros_like(self.E)
+            J_fields[ptr('Jr')] = self.Jr
+            J_fields[ptr('Ji')] = self.Ji
+
+
         '''
         PML
         '''
@@ -191,6 +204,15 @@ class Simulation(object):
         self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
         if do_poynting:
             self.update_S = self._create_operation(S_source, (base_fields, S_fields))
+        if do_fieldsrc:
+            args = OrderedDict()
+            [args.update(d) for d in (base_fields, J_fields)]
+            var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
+            print(var_args)
+            update = ElementwiseKernel(self.context, operation=J_source,
+                                       arguments=', '.join(list(args.keys()) + var_args))
+            #print(len(args.values()),'\n\n',  args.values(), args.keys())
+            self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
 
 
     def _create_pmls(self, pmls):

From 3ac20f62718f52d3ea710fb4fb24d135eb80faf7 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 02:01:48 -0800
Subject: [PATCH 24/33] Add bloch boundaries (untested)

---
 opencl_fdtd/kernels/update_e.cl | 12 ++++++++++++
 opencl_fdtd/kernels/update_h.cl | 15 +++++++++++++++
 opencl_fdtd/simulation.py       | 33 ++++++++++++++++++++++++++++++++-
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 5a9ed1a..c578414 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -31,6 +31,18 @@ ftype dHyz = Hy[i] - Hy[i + mz];
 ftype dHzx = Hz[i] - Hz[i + mx];
 ftype dHzy = Hz[i] - Hz[i + my];
 
+{% for bloch in bloch_boundaries -%}
+    {%- set r = bloch['axis'] -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+if ({{r}} == 0) {
+    ftype bloch_im = {{bloch['real']}};
+    ftype bloch_re = {{bloch['imag']}};
+    dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]);
+    dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]);
+}
+{%- endfor %}
+
+
 /*
  *   PML Update
  */
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 56d9bbc..0ef15dd 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -28,6 +28,21 @@ ftype dEyz = Ey[i + pz] - Ey[i];
 ftype dEzx = Ez[i + px] - Ez[i];
 ftype dEzy = Ez[i + py] - Ez[i];
 
+
+{% for bloch in bloch_boundaries -%}
+    {%- set r = bloch['axis'] -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+if ({{r}} == s{{r}} - 1) {
+    ftype bloch_re = {{bloch['real']}};
+    ftype bloch_im = {{bloch['imag']}};
+    dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
+    dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
+}
+{%- endfor %}
+
+
+
+
 {%- if do_poynting %}
 
 
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 6e12b2e..9b23cad 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -76,6 +76,7 @@ class Simulation(object):
     def __init__(self,
                  epsilon: List[numpy.ndarray],
                  pmls: List[Dict[str, int or float]],
+                 bloch_boundaries: List[Dict[str, int or float]] = (),
                  dt: float = .99/numpy.sqrt(3),
                  initial_fields: Dict[str, List[numpy.ndarray]] = None,
                  context: pyopencl.Context = None,
@@ -132,6 +133,9 @@ class Simulation(object):
 
         self.E = self._create_field(initial_fields.get('E', None))
         self.H = self._create_field(initial_fields.get('H', None))
+        if bloch_boundaries:
+            self.F = self._create_field(initial_fields.get('F', None))
+            self.G = self._create_field(initial_fields.get('G', None))
 
         for pml in pmls:
             pml.setdefault('thickness', 8)
@@ -154,6 +158,17 @@ class Simulation(object):
         eps_field = OrderedDict()
         eps_field[ptr('eps')] = self.eps
 
+        if bloch_boundaries:
+            base_fields[ptr('F')] = self.F
+            base_fields[ptr('G')] = self.G
+
+            bloch_fields = OrderedDict()
+            bloch_fields[ptr('E')] = self.F
+            bloch_fields[ptr('H')] = self.G
+            bloch_fields[ctype + ' dt'] = self.dt
+            bloch_fields[ptr('F')] = self.E
+            bloch_fields[ptr('G')] = self.H
+
         common_source = jinja_env.get_template('common.cl').render(
                 ftype=ctype,
                 shape=self.shape,
@@ -162,13 +177,24 @@ class Simulation(object):
                 'common_header': common_source,
                 'pmls': pmls,
                 'do_poynting': do_poynting,
+                'bloch': bloch_boundaries,
                 }
         E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
         H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
-
         self.sources['E'] = E_source
         self.sources['H'] = H_source
 
+        if bloch_boundaries:
+            bloch_args = jinja_args.copy()
+            bloch_args['do_poynting'] = False
+            bloch_args['bloch'] = [{'axis': b['axis'],
+                                    'real': b['imag'],
+                                    'imag': b['real']}
+                                   for b in bloch_boundaries]
+            F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
+            G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
+            self.sources['F'] = F_source
+            self.sources['G'] = G_source
 
 
         S_fields = OrderedDict()
@@ -196,6 +222,8 @@ class Simulation(object):
         PML
         '''
         pml_e_fields, pml_h_fields = self._create_pmls(pmls)
+        if bloch_boundaries:
+            pml_f_fields, pml_g_fields = self._create_pmls(pmls)
 
         '''
         Create operations
@@ -204,6 +232,9 @@ class Simulation(object):
         self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
         if do_poynting:
             self.update_S = self._create_operation(S_source, (base_fields, S_fields))
+        if bloch_boundaries:
+            self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
+            self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
         if do_fieldsrc:
             args = OrderedDict()
             [args.update(d) for d in (base_fields, J_fields)]

From 5ffe547640c78af55a7a8e802f8a503cee21e950 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:13:27 -0800
Subject: [PATCH 25/33] Typo fixes and minor comment updates

---
 opencl_fdtd/kernels/update_e.cl |  4 ++--
 opencl_fdtd/kernels/update_h.cl |  2 +-
 opencl_fdtd/simulation.py       | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index c578414..f44527d 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -4,7 +4,7 @@
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
-        axes, polarities, and thicknesses.
+ *      axes, polarities, and thicknesses.
  *
  *  OpenCL args:
  *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
@@ -20,7 +20,7 @@ __global ftype *epsz = eps + ZZ;
 
 
 /*
- *   Precalclate derivatives
+ *   Precalculate derivatives
  */
 ftype dHxy = Hx[i] - Hx[i + my];
 ftype dHxz = Hx[i] - Hx[i + mz];
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 0ef15dd..f1c19cb 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -5,7 +5,7 @@
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
-        axes, polarities, and thicknesses.
+ *      axes, polarities, and thicknesses.
  *   do_poynting: Whether to precalculate poynting vector components (boolean)
  *
  *  OpenCL args:
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 9b23cad..0eaef5d 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -56,11 +56,11 @@ class Simulation(object):
      event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
      event2 can then be used to prepare further operations to be run after update_H.
     """
-    E = None    # type: List[pyopencl.array.Array]
-    H = None    # type: List[pyopencl.array.Array]
-    S = None    # type: List[pyopencl.array.Array]
-    eps = None  # type: List[pyopencl.array.Array]
-    dt = None   # type: float
+    E = None            # type: pyopencl.array.Array
+    H = None            # type: pyopencl.array.Array
+    S = None            # type: pyopencl.array.Array
+    eps = None          # type: pyopencl.array.Array
+    dt = None           # type: float
 
     arg_type = None     # type: numpy.float32 or numpy.float64
 

From f5a5a0ad04176e8ebbca15c8e4a53300e5b2f5e6 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:36:11 -0800
Subject: [PATCH 26/33] Enable nonuniform grids (minimally tested)

---
 opencl_fdtd/kernels/update_e.cl | 28 +++++++++++++++++++++-------
 opencl_fdtd/kernels/update_h.cl | 28 +++++++++++++++++++++-------
 opencl_fdtd/simulation.py       | 28 +++++++++++++++++++++++++---
 3 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index f44527d..9127181 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -5,9 +5,12 @@
  *   common_header: Rendered contents of common.cl
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
  *      axes, polarities, and thicknesses.
+ *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
+ *      Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as
+ *      OpenCL args.
  *
  *  OpenCL args:
- *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
+ *   E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}]
  */
 
 {{common_header}}
@@ -19,17 +22,28 @@ __global ftype *epsy = eps + YY;
 __global ftype *epsz = eps + ZZ;
 
 
+{%- if uniform_dx %}
+ftype inv_dx = 1.0 / {{uniform_dx}};
+ftype inv_dy = 1.0 / {{uniform_dx}};
+ftype inv_dz = 1.0 / {{uniform_dx}};
+{%- else %}
+ftype inv_dx = inv_dhx[x];
+ftype inv_dy = inv_dhy[y];
+ftype inv_dz = inv_dhz[z];
+{%- endif %}
+
+
 /*
  *   Precalculate derivatives
  */
-ftype dHxy = Hx[i] - Hx[i + my];
-ftype dHxz = Hx[i] - Hx[i + mz];
+ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy;
+ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz;
 
-ftype dHyx = Hy[i] - Hy[i + mx];
-ftype dHyz = Hy[i] - Hy[i + mz];
+ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx;
+ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz;
 
-ftype dHzx = Hz[i] - Hz[i + mx];
-ftype dHzy = Hz[i] - Hz[i + my];
+ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx;
+ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy;
 
 {% for bloch in bloch_boundaries -%}
     {%- set r = bloch['axis'] -%}
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index f1c19cb..ac75157 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -7,9 +7,12 @@
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
  *      axes, polarities, and thicknesses.
  *   do_poynting: Whether to precalculate poynting vector components (boolean)
+ *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
+ *      Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
+ *      OpenCL args.
  *
  *  OpenCL args:
- *   E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
  */
 
 {{common_header}}
@@ -19,14 +22,25 @@
 /*
  *  Precalculate derivatives
  */
-ftype dExy = Ex[i + py] - Ex[i];
-ftype dExz = Ex[i + pz] - Ex[i];
+{%- if uniform_dx %}
+ftype inv_dx = 1.0 / {{uniform_dx}};
+ftype inv_dy = 1.0 / {{uniform_dx}};
+ftype inv_dz = 1.0 / {{uniform_dx}};
+{%- else %}
+ftype inv_dx = inv_dex[x];
+ftype inv_dy = inv_dey[y];
+ftype inv_dz = inv_dez[z];
+{%- endif %}
 
-ftype dEyx = Ey[i + px] - Ey[i];
-ftype dEyz = Ey[i + pz] - Ey[i];
 
-ftype dEzx = Ez[i + px] - Ez[i];
-ftype dEzy = Ez[i + py] - Ez[i];
+ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy;
+ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz;
+
+ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx;
+ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz;
+
+ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx;
+ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy;
 
 
 {% for bloch in bloch_boundaries -%}
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 0eaef5d..940725a 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -61,6 +61,7 @@ class Simulation(object):
     S = None            # type: pyopencl.array.Array
     eps = None          # type: pyopencl.array.Array
     dt = None           # type: float
+    inv_dxes = None     # type: List[pyopencl.array.Array]
 
     arg_type = None     # type: numpy.float32 or numpy.float64
 
@@ -77,7 +78,8 @@ class Simulation(object):
                  epsilon: List[numpy.ndarray],
                  pmls: List[Dict[str, int or float]],
                  bloch_boundaries: List[Dict[str, int or float]] = (),
-                 dt: float = .99/numpy.sqrt(3),
+                 dxes: List[List[numpy.ndarray]] or float = None,
+                 dt: float = None,
                  initial_fields: Dict[str, List[numpy.ndarray]] = None,
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
@@ -98,7 +100,7 @@ class Simulation(object):
             'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
             'm': Polynomial grading exponent. Default 3.5.
             'ma': Exponent for alpha. Default 1.
-        :param dt: Time step. Default is .99/sqrt(3).
+        :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
         :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
         :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
         :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
@@ -124,7 +126,22 @@ class Simulation(object):
         self._create_context(context, queue)
         self._create_eps(epsilon)
 
-        if dt > .99/numpy.sqrt(3):
+        if dxes is None:
+            dxes = 1.0
+
+        if isinstance(dxes, (float, int)):
+            uniform_dx = dxes
+            min_dx = dxes
+        else:
+            uniform_dx = False
+            self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]]
+            min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
+
+        max_dt = min_dx * .99 / numpy.sqrt(3)
+
+        if dt is None:
+            self.dt = max_dt
+        elif dt > max_dt:
             warnings.warn('Warning: unstable dt: {}'.format(dt))
         elif dt <= 0:
             raise Exception('Invalid dt: {}'.format(dt))
@@ -154,6 +171,10 @@ class Simulation(object):
         base_fields[ptr('E')] = self.E
         base_fields[ptr('H')] = self.H
         base_fields[ctype + ' dt'] = self.dt
+        if uniform_dx == False:
+            inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
+            for name, field in zip(inv_dx_names, self.inv_dxes):
+                base_fields[ptr(name)] = field
 
         eps_field = OrderedDict()
         eps_field[ptr('eps')] = self.eps
@@ -178,6 +199,7 @@ class Simulation(object):
                 'pmls': pmls,
                 'do_poynting': do_poynting,
                 'bloch': bloch_boundaries,
+                'uniform_dx': uniform_dx,
                 }
         E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
         H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)

From 65cc1183921b6107b8487fc762a1606bb6512d8a Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:36:22 -0800
Subject: [PATCH 27/33] Remove debug code

---
 opencl_fdtd/simulation.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 940725a..087e9c4 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -261,10 +261,8 @@ class Simulation(object):
             args = OrderedDict()
             [args.update(d) for d in (base_fields, J_fields)]
             var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
-            print(var_args)
             update = ElementwiseKernel(self.context, operation=J_source,
                                        arguments=', '.join(list(args.keys()) + var_args))
-            #print(len(args.values()),'\n\n',  args.values(), args.keys())
             self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
 
 

From 9cf50b1d88f2745cbf8f8215e23ec0610de67b84 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:36:32 -0800
Subject: [PATCH 28/33] Enable nonzero pml alpha

---
 opencl_fdtd/simulation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 087e9c4..d24dcf2 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -160,6 +160,7 @@ class Simulation(object):
             pml.setdefault('mu_eff', 1.0)
             pml.setdefault('ln_R_per_layer', -1.6)
             pml.setdefault('m', 3.5)
+            pml.setdefault('cfs_alpha', 0)
             pml.setdefault('ma', 1)
 
         ctype = type_to_C(self.arg_type)
@@ -278,7 +279,7 @@ class Simulation(object):
 
             sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1)
             kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
-            alpha_max = 0           # TODO: Nonzero alpha?
+            alpha_max = pml['cfs_alpha']
 
             def par(x):
                 scaling = ((x / (pml['thickness'])) ** pml['m'])

From 60b70bb332d29fb5a2aecb801517be659cf4daf9 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:37:07 -0800
Subject: [PATCH 29/33] Add restrict keyword to pointers (not sharing the same
 memory for multiple fields)

---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index d24dcf2..1e573c9 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -166,7 +166,7 @@ class Simulation(object):
         ctype = type_to_C(self.arg_type)
 
         def ptr(arg: str) -> str:
-            return ctype + ' *' + arg
+            return ctype + ' * restrict ' + arg
 
         base_fields = OrderedDict()
         base_fields[ptr('E')] = self.E

From a76e741d325d26ff5968904ae12f4ed95effd313 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:37:16 -0800
Subject: [PATCH 30/33] Minor spacing changes

---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 1e573c9..988e45b 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -321,7 +321,6 @@ class Simulation(object):
                                    arguments=', '.join(args.keys()))
         return lambda e: update(*args.values(), wait_for=e)
 
-
     def _create_context(self, context: pyopencl.Context = None,
                         queue: pyopencl.CommandQueue = None):
         if context is None:
@@ -353,6 +352,7 @@ class Simulation(object):
                 Exception('Initial field list elements must have same shape as epsilon elements')
             return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
 
+
 def type_to_C(float_type) -> str:
     """
     Returns a string corresponding to the C equivalent of a numpy type.

From 0f0f16e1846489ac1ab4c4126167c5b0b3fbb0fe Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Fri, 30 Nov 2018 01:03:10 -0800
Subject: [PATCH 31/33] Minor formatting changes

---
 opencl_fdtd/kernels/update_h.cl | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index ac75157..187a18a 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -52,10 +52,7 @@ if ({{r}} == s{{r}} - 1) {
     dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
     dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
 }
-{%- endfor %}
-
-
-
+{% endfor -%}
 
 {%- if do_poynting %}
 
@@ -93,7 +90,7 @@ ftype pHzi = 0;
         {%- set se, sh = '-', '+' -%}
     {%- else -%}
         {%- set se, sh = '+', '-' -%}
-    {%- endif -%}
+    {%- endif %}
 
 int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
 

From 385e2859a9620cefe3536bd2eea9f5a051c2e9b2 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Fri, 30 Nov 2018 01:03:20 -0800
Subject: [PATCH 32/33] Update parameter descriptions

---
 opencl_fdtd/simulation.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 988e45b..9dd90c0 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -100,9 +100,13 @@ class Simulation(object):
             'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
             'm': Polynomial grading exponent. Default 3.5.
             'ma': Exponent for alpha. Default 1.
+        :param bloch_boundaries: List of dicts with keys:
+            'axis': One of 'x', 'y', 'z'.
+            'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
+            'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
         :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
-        :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
-        :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
+        :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
+            specified fields (default is 0 everywhere). Fields have same format as epsilon.
         :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
         :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
         :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.

From 582dafbc2fefce0bcc38b524a715e99f9c5ac89b Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Fri, 30 Nov 2018 01:04:00 -0800
Subject: [PATCH 33/33] Update example with bloch fields

---
 fdtd.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 08606c6..99af07b 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -128,7 +128,9 @@ def main():
     # #### Create the simulation grid ####
     pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
             for a in 'xyz' for p in 'np']
-    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
+    #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
+    bloch = []
+    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
@@ -142,22 +144,32 @@ def main():
 
     with open('sources.c', 'w') as f:
         f.write(sim.sources['E'])
-        f.write('\n==========================================\n')
+        f.write('\n====================H======================\n')
         f.write(sim.sources['H'])
         if sim.update_S:
-            f.write('\n==========================================\n')
+            f.write('\n=====================S=====================\n')
             f.write(sim.sources['S'])
+        if bloch:
+            f.write('\n=====================F=====================\n')
+            f.write(sim.sources['F'])
+            f.write('\n=====================G=====================\n')
+            f.write(sim.sources['G'])
 
     # #### Run a bunch of iterations ####
     # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
     #  immediately and run once prev_event is finished.
     start = time.perf_counter()
     for t in range(max_t):
-        sim.update_E([]).wait()
+        e = sim.update_E([])
+        if bloch:
+            e = sim.update_F([e])
+        e.wait()
 
         ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
         sim.E[ind] += field_source(t)
         e = sim.update_H([])
+        if bloch:
+            e = sim.update_G([e])
         if sim.update_S:
             e = sim.update_S([e])
         e.wait()