From b372130c6b91030a7cc56842a07de569b792bec4 Mon Sep 17 00:00:00 2001
From: jan <jan@orange.mpxd.net>
Date: Sun, 16 Apr 2017 02:51:09 -0700
Subject: [PATCH 01/33] try out opencl image3d_t... it runs in the base case,
 no idea if correctly

---
 fdtd.py                |  4 +--
 fdtd/kernels/common.cl | 36 +++-----------------------
 fdtd/simulation.py     | 59 ++++++++++++++++++++++++++----------------
 3 files changed, 42 insertions(+), 57 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index bbdabec..d41a038 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -122,7 +122,7 @@ def main():
 
     print('grid shape: {}'.format(grid.shape))
     # #### Create the simulation grid ####
-    sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
+    sim = Simulation(grid.grids, do_poynting=False, pmls=[])
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
@@ -150,7 +150,7 @@ def main():
         sim.update_E([]).wait()
 
         ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
-        sim.E[ind] += field_source(t)
+#        sim.buf[ind] += field_source(t)
         e = sim.update_H([])
         if sim.update_S:
             e = sim.update_S([e])
diff --git a/fdtd/kernels/common.cl b/fdtd/kernels/common.cl
index d203ca7..e43336b 100644
--- a/fdtd/kernels/common.cl
+++ b/fdtd/kernels/common.cl
@@ -2,14 +2,10 @@
 /* Common code for E, H updates
  *
  * Template parameters:
- *  ftype	type name (e.g. float or double)
  *  shape       list of 3 ints specifying shape of fields
  */
 #}
 
-typedef {{ftype}} ftype;
-
-
 /*
  * Field size info
  */
@@ -18,13 +14,6 @@ const size_t sy = {{shape[1]}};
 const size_t sz = {{shape[2]}};
 const size_t field_size = sx * sy * sz;
 
-//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
-// i is outside the bounds of Ex[].
-if (i >= field_size) {
-    PYOPENCL_ELWISE_CONTINUE;
-}
-
-
 /*
  * Array indexing
  */
@@ -42,25 +31,6 @@ const size_t diy = sz;
 const size_t diz = 1;
 
 
-/*
- * Pointer math
- */
-//Pointer offsets into the components of a linearized vector-field
-// (eg. Hx = H + XX, where H and Hx are pointers)
-const size_t XX = 0;
-const size_t YY = field_size;
-const size_t ZZ = field_size * 2;
-
-//Define pointers to vector components of each field (eg. Hx = H + XX)
-__global ftype *Ex = E + XX;
-__global ftype *Ey = E + YY;
-__global ftype *Ez = E + ZZ;
-
-__global ftype *Hx = H + XX;
-__global ftype *Hy = H + YY;
-__global ftype *Hz = H + ZZ;
-
-
 /*
  * Implement periodic boundary conditions
  *
@@ -73,9 +43,9 @@ __global ftype *Hz = H + ZZ;
  * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
  */
 {% for r in 'xyz' %}
-int m{{r}} = -di{{r}};
-int p{{r}} = +di{{r}};
-int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
+int m{{r}} = -1;
+int p{{r}} = +1;
+int wrap_{{r}} = s{{r}} - 1;
 if ( {{r}} == 0 ) {
   m{{r}} = wrap_{{r}};
 } else if ( {{r}} == s{{r}} - 1 ) {
diff --git a/fdtd/simulation.py b/fdtd/simulation.py
index f1a8dec..9ba9b88 100644
--- a/fdtd/simulation.py
+++ b/fdtd/simulation.py
@@ -17,6 +17,7 @@ from fdfd_tools import vec
 
 __author__ = 'Jan Petykiewicz'
 
+float4 = pyopencl.array.vec.float4
 
 # Create jinja2 env on module load
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
@@ -88,12 +89,23 @@ class Simulation(object):
         else:
             self.dt = dt
 
+        def make4d(f):
+            g = [numpy.transpose(fi)[:,:,:,None] for fi in f]
+            h = g + [numpy.empty_like(g[0])]
+            j = numpy.concatenate(h, axis=3)
+            return numpy.ascontiguousarray(j, dtype=numpy.float32) 
+
         self.arg_type = float_type
         self.sources = {}
-        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type))
+        ef = make4d(epsilon).astype(float_type)
+        self.eps = pyopencl.image_from_array(self.context,
+                make4d(epsilon), num_channels=4, mode='r')
+
+        self.buf = pyopencl.array.Array(self.queue, shape=epsilon.shape, dtype=float4)
 
         if initial_E is None:
-            self.E = pyopencl.array.zeros_like(self.eps)
+            self.E = pyopencl.image_from_array(self.context,
+                make4d(epsilon) * 0, num_channels=4, mode='r')
         else:
             if len(initial_E) != 3:
                 Exception('Initial_E must be a list of length 3')
@@ -102,7 +114,8 @@ class Simulation(object):
             self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
 
         if initial_H is None:
-            self.H = pyopencl.array.zeros_like(self.eps)
+            self.H = pyopencl.image_from_array(self.context,
+                make4d(epsilon) * 0, num_channels=4, mode='r')
         else:
             if len(initial_H) != 3:
                 Exception('Initial_H must be a list of length 3')
@@ -119,15 +132,8 @@ class Simulation(object):
             return ctype + ' *' + arg
 
         base_fields = OrderedDict()
-        base_fields[ptr('E')] = self.E
-        base_fields[ptr('H')] = self.H
-        base_fields[ctype + ' dt'] = self.dt
-
-        eps_field = OrderedDict()
-        eps_field[ptr('eps')] = self.eps
 
         common_source = jinja_env.get_template('common.cl').render(
-                ftype=ctype,
                 shape=epsilon[0].shape,
                 )
         jinja_args = {
@@ -136,8 +142,8 @@ class Simulation(object):
                 'pmls': pmls,
                 'do_poynting': do_poynting,
                 }
-        E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
-        H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
+        E_source = jinja_env.get_template('update_e_full.cl').render(**jinja_args)
+        H_source = jinja_env.get_template('update_h_full.cl').render(**jinja_args)
 
         self.sources['E'] = E_source
         self.sources['H'] = H_source
@@ -198,17 +204,26 @@ class Simulation(object):
         '''
         Create operations
         '''
-        E_args = OrderedDict()
-        [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)]
-        E_update = ElementwiseKernel(self.context, operation=E_source,
-                                     arguments=', '.join(E_args.keys()))
+        E_update = pyopencl.Program(self.context, E_source).build().update_e
+        H_update = pyopencl.Program(self.context, H_source).build().update_h
+        max_gs = E_update.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE,
+                                      self.queue.device)
+        gs, ls = self.buf.get_sizes(self.queue, max_gs)
+        print('gs', gs, ls, max_gs)
+        def update_E(e):
+            e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.E, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e)
+            e = E_update(self.queue, gs, ls, self.buf.data, self.H, numpy.float32(dt), self.eps, numpy.uint32(self.buf.size), wait_for=[e])
+            return e
 
-        H_args = OrderedDict()
-        [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)]
-        H_update = ElementwiseKernel(self.context, operation=H_source,
-                                     arguments=', '.join(H_args.keys()))
-        self.update_E = lambda e: E_update(*E_args.values(), wait_for=e)
-        self.update_H = lambda e: H_update(*H_args.values(), wait_for=e)
+        def update_H(e):
+            e = pyopencl.enqueue_copy(self.queue, self.E, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e)
+            e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.H, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e])
+            e = H_update(self.queue, gs, ls, self.E, self.buf.data, numpy.float32(dt), numpy.uint32(self.buf.size), wait_for=[e])
+            e = pyopencl.enqueue_copy(self.queue, self.H, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e])
+            return e
+
+        self.update_E = update_E
+        self.update_H = update_H
 
         if do_poynting:
             S_args = OrderedDict()

From 97d9901e4b0356d4a8ab77d65013e7f5b73bc009 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sat, 12 Aug 2017 19:20:29 -0700
Subject: [PATCH 02/33] Use logging package for output

---
 fdtd.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index bbdabec..961bc0e 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -6,6 +6,7 @@ See main() for simulation setup.
 
 import sys
 import time
+import logging
 
 import numpy
 import lzma 
@@ -20,6 +21,9 @@ import fdfd_tools
 
 __author__ = 'Jan Petykiewicz'
 
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
 
 def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
     """
@@ -120,7 +124,7 @@ def main():
                        eps=n_air**2,
                        polygons=mask.as_polygons())
 
-    print('grid shape: {}'.format(grid.shape))
+    logger.info('grid shape: {}'.format(grid.shape))
     # #### Create the simulation grid ####
     sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
 
@@ -157,7 +161,7 @@ def main():
         e.wait()
 
         if t % 100 == 0:
-            print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+            logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
             sys.stdout.flush()
 
     with lzma.open('saved_simulation', 'wb') as f:

From 0d91f0d43efba28e91dc1a88b5f3acab9294b799 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Mon, 14 Aug 2017 15:41:20 -0700
Subject: [PATCH 03/33] rename lib

---
 fdtd.py                                   | 7 ++++---
 fdtd/__init__.py                          | 0
 opencl_fdtd/__init__.py                   | 1 +
 {fdtd => opencl_fdtd}/kernels/common.cl   | 0
 {fdtd => opencl_fdtd}/kernels/update_e.cl | 0
 {fdtd => opencl_fdtd}/kernels/update_h.cl | 0
 {fdtd => opencl_fdtd}/kernels/update_s.cl | 0
 {fdtd => opencl_fdtd}/simulation.py       | 6 +++---
 8 files changed, 8 insertions(+), 6 deletions(-)
 delete mode 100644 fdtd/__init__.py
 create mode 100644 opencl_fdtd/__init__.py
 rename {fdtd => opencl_fdtd}/kernels/common.cl (100%)
 rename {fdtd => opencl_fdtd}/kernels/update_e.cl (100%)
 rename {fdtd => opencl_fdtd}/kernels/update_h.cl (100%)
 rename {fdtd => opencl_fdtd}/kernels/update_s.cl (100%)
 rename {fdtd => opencl_fdtd}/simulation.py (99%)

diff --git a/fdtd.py b/fdtd.py
index bbdabec..13af0fd 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -8,10 +8,10 @@ import sys
 import time
 
 import numpy
-import lzma 
+import lzma
 import dill
 
-from fdtd.simulation import Simulation
+from opencl_fdtd import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
@@ -133,7 +133,7 @@ def main():
     def field_source(i):
         t0 = i * sim.dt - delay
         return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2)
-   
+
     with open('sources.c', 'w') as f:
         f.write(sim.sources['E'])
         f.write('\n==========================================\n')
@@ -172,5 +172,6 @@ def main():
             d['S'] = unvec(sim.S.get())
         dill.dump(d, f)
 
+
 if __name__ == '__main__':
     main()
diff --git a/fdtd/__init__.py b/fdtd/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
new file mode 100644
index 0000000..e3e95b4
--- /dev/null
+++ b/opencl_fdtd/__init__.py
@@ -0,0 +1 @@
+from .simulation import Simulation, type_to_C
diff --git a/fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl
similarity index 100%
rename from fdtd/kernels/common.cl
rename to opencl_fdtd/kernels/common.cl
diff --git a/fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
similarity index 100%
rename from fdtd/kernels/update_e.cl
rename to opencl_fdtd/kernels/update_e.cl
diff --git a/fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
similarity index 100%
rename from fdtd/kernels/update_h.cl
rename to opencl_fdtd/kernels/update_h.cl
diff --git a/fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl
similarity index 100%
rename from fdtd/kernels/update_s.cl
rename to opencl_fdtd/kernels/update_s.cl
diff --git a/fdtd/simulation.py b/opencl_fdtd/simulation.py
similarity index 99%
rename from fdtd/simulation.py
rename to opencl_fdtd/simulation.py
index f1a8dec..77a95df 100644
--- a/fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -152,7 +152,7 @@ class Simulation(object):
             S_fields[ptr('oS')] = self.oS
             S_fields[ptr('S')] = self.S
         else:
-            S_fields = OrderedDict() 
+            S_fields = OrderedDict()
 
         '''
         PML
@@ -167,7 +167,7 @@ class Simulation(object):
             p0 = numpy.exp(-(sigma + alpha) * dt)
             p1 = sigma / (sigma + alpha) * (p0 - 1)
             return p0, p1
-        
+
         xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
         xep -= 0.5
         xhn -= 0.5
@@ -190,7 +190,7 @@ class Simulation(object):
             for ne, nh in zip(*psi_names):
                 pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
                 pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
-        
+
         self.pml_e_fields = pml_e_fields
         self.pml_h_fields = pml_h_fields
 

From a85f5477490f8f175dc42e2e61a67026b4c9063a Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Thu, 24 Aug 2017 11:28:03 -0700
Subject: [PATCH 04/33] doc updates

---
 README.md                       |  4 ++-
 opencl_fdtd/kernels/update_e.cl |  4 +--
 opencl_fdtd/kernels/update_h.cl |  4 +--
 opencl_fdtd/kernels/update_s.cl |  8 ++---
 opencl_fdtd/simulation.py       | 52 ++++++++++++++++++++++++++++++---
 5 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index fbc972b..26d5acd 100644
--- a/README.md
+++ b/README.md
@@ -27,10 +27,12 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs).
 * numpy
 * pyopencl
 * jinja2
+* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
+
+Optional (used for examples):
 * dill (for file output)
 * [gridlock](https://mpxd.net/gogs/jan/gridlock)
 * [masque](https://mpxd.net/gogs/jan/masque)
-* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
 
 To get the code, just clone this repository:
 ```bash
diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 2728b46..4bda993 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -1,11 +1,11 @@
 /*
  *  Update E-field, including any PMLs.
- *  
+ *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
  *   pml_thickness: Number of cells (integer)
- *   
+ *
  *  OpenCL args:
  *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
  */
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index e093cb9..8519b51 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -1,12 +1,12 @@
 /*
  *  Update H-field, including any PMLs.
  *   Also precalculate values for poynting vector if necessary.
- *  
+ *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
  *   pml_thickness: Number of cells (integer)
- *   do_poynting: Whether to precalculate poynting vector components (boolean)  
+ *   do_poynting: Whether to precalculate poynting vector components (boolean)
  *
  *  OpenCL args:
  *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
diff --git a/opencl_fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl
index 0310411..94e061e 100644
--- a/opencl_fdtd/kernels/update_s.cl
+++ b/opencl_fdtd/kernels/update_s.cl
@@ -1,11 +1,11 @@
 /*
  *  Update E-field, including any PMLs.
- *  
+ *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
  *   pml_thickness: Number of cells (integer)
- *   
+ *
  *  OpenCL args:
  *   E, H, dt, S, oS
  */
@@ -17,12 +17,12 @@
 
 /*
  * Calculate S from oS (pre-calculated components)
- */ 
+ */
 __global ftype *Sx = S + XX;
 __global ftype *Sy = S + YY;
 __global ftype *Sz = S + ZZ;
 
-// Use unscaled S components from H locations 
+// Use unscaled S components from H locations
 __global ftype *oSxy = oS + 0 * field_size;
 __global ftype *oSyz = oS + 1 * field_size;
 __global ftype *oSzx = oS + 2 * field_size;
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 77a95df..7a49f39 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -25,6 +25,35 @@ jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
 class Simulation(object):
     """
     Constructs and holds the basic FDTD operations and related fields
+
+    After constructing this object, call the (update_E, update_H, update_S) members
+     to perform FDTD updates on the stored (E, H, S) fields:
+
+        sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
+        with open('sources.c', 'w') as f:
+            f.write('{}'.format(sim.sources))
+
+        for t in range(max_t):
+            sim.update_E([]).wait()
+
+            # Find the linear index for the center point, for Ey
+            ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \
+                    numpy.prod(grid.shape) * 1
+            # Perturb the field (i.e., add a soft current source)
+            sim.E[ind] += numpy.sin(omega * t * sim.dt)
+            event = sim.update_H([])
+            if sim.update_S:
+                event = sim.update_S([event])
+            event.wait()
+
+            with lzma.open('saved_simulation', 'wb') as f:
+                dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
+
+    Code in the form
+        event2 = sim.update_H([event0, event1])
+     indicates that the update_H operation should be prepared immediately, but wait for
+     event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
+     event2 can then be used to prepare further operations to be run after update_H.
     """
     E = None    # type: List[pyopencl.array.Array]
     H = None    # type: List[pyopencl.array.Array]
@@ -37,9 +66,9 @@ class Simulation(object):
     context = None      # type: pyopencl.Context
     queue = None        # type: pyopencl.CommandQueue
 
-    update_E = None     # type: Callable[[],pyopencl.Event]
-    update_H = None     # type: Callable[[],pyopencl.Event]
-    update_S = None     # type: Callable[[],pyopencl.Event]
+    update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
     sources = None      # type: Dict[str, str]
 
     def __init__(self,
@@ -50,8 +79,8 @@ class Simulation(object):
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 pml_thickness: int = 10,
                  pmls: List[List[str]] = None,
+                 pml_thickness: int = 10,
                  do_poynting: bool = True):
         """
         Initialize the simulation.
@@ -64,6 +93,21 @@ class Simulation(object):
         :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
         :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
         :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+        :param pmls: List of [axis, direction] pairs which specify simluation boundaries to be
+                'coated' with a PML (absorbing layer). Axis should be one of 'x', 'y', 'z', and
+                direction should be one of 'n', 'p' (i.e., negative, positive).
+                Default is to apply PMLs to all six boundaries.
+        :param pml_thickness: Thickness of any PMLs, in number of grid cells. Default 10.
+        :param do_poynting: If true, enables calculation of the poynting vector, S.
+                Poynting vector calculation adds the following computational burdens:
+                    * During update_H, ~6 extra additions/cell are performed in order to spatially
+                        average E and temporally average H. These quantities are multiplied
+                        (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
+                    * update_S performs a discrete cross product using the precalculated products
+                        from update_H. This is not nice to the cache and similar to e.g. update_E
+                        in complexity.
+                    * GPU memory requirements are approximately doubled, since S and the intermediate
+                        products must be stored.
         """
 
         if len(epsilon) != 3:

From d02cd18403e8c07b8c3de7a10b335fec180a148a Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 12:34:11 -0700
Subject: [PATCH 05/33] improve pml specification

---
 fdtd.py                         |  4 +-
 opencl_fdtd/kernels/update_e.cl | 17 +++----
 opencl_fdtd/kernels/update_h.cl | 20 ++++----
 opencl_fdtd/simulation.py       | 82 +++++++++++++++++++--------------
 4 files changed, 69 insertions(+), 54 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 5d0f4c3..08606c6 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -126,7 +126,9 @@ def main():
 
     logger.info('grid shape: {}'.format(grid.shape))
     # #### Create the simulation grid ####
-    sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
+    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
+            for a in 'xyz' for p in 'np']
+    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 4bda993..f7304e7 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -3,8 +3,8 @@
  *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
+ *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
+        axes, polarities, and thicknesses.
  *
  *  OpenCL args:
  *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
@@ -18,9 +18,6 @@ __global ftype *epsx = eps + XX;
 __global ftype *epsy = eps + YY;
 __global ftype *epsz = eps + ZZ;
 
-{% if pmls -%}
-const int pml_thickness = {{pml_thickness}};
-{%- endif %}
 
 /*
  *   Precalclate derivatives
@@ -42,7 +39,9 @@ ftype pExi = 0;
 ftype pEyi = 0;
 ftype pEzi = 0;
 
-{% for r, p in pmls -%}
+{% for pml in pmls -%}
+    {%- set r = pml['axis'] -%}
+    {%- set p = pml['polarity'] -%}
     {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
     {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
     {%- if r != 'y' -%}
@@ -51,14 +50,16 @@ ftype pEzi = 0;
         {%- set se, sh = '+', '-' -%}
     {%- endif -%}
 
+pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+
     {%- if p == 'n' %}
 
-if ( {{r}} < pml_thickness ) {
+if ( {{r}} < pml_{{r ~ p}_thickness ) {
     const size_t ir = {{r}};                          // index into pml parameters
 
     {%- elif p == 'p' %}
 
-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}_thickness ) {
     const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
 
     {%- endif %}
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 8519b51..7c648b6 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -4,22 +4,18 @@
  *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
+ *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
+        axes, polarities, and thicknesses.
  *   do_poynting: Whether to precalculate poynting vector components (boolean)
  *
  *  OpenCL args:
- *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
  */
 
 {{common_header}}
 
 ////////////////////////////////////////////////////////////////////////////
 
-{% if pmls -%}
-const int pml_thickness = {{pml_thickness}};
-{%- endif %}
-
 /*
  *  Precalculate derivatives
  */
@@ -57,7 +53,9 @@ ftype pHxi = 0;
 ftype pHyi = 0;
 ftype pHzi = 0;
 
-{%- for r, p in pmls -%}
+{% for pml in pmls -%}
+    {%- set r = pml['axis'] -%}
+    {%- set p = pml['polarity'] -%}
     {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
     {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
     {%- if r != 'y' -%}
@@ -66,14 +64,16 @@ ftype pHzi = 0;
         {%- set se, sh = '+', '-' -%}
     {%- endif -%}
 
+pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+
     {%- if p == 'n' %}
 
-if ( {{r}} < pml_thickness ) {
+if ( {{r}} < pml_{{r ~ p}}_thickness ) {
     const size_t ir = {{r}};                          // index into pml parameters
 
     {%- elif p == 'p' %}
 
-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
     const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
 
     {%- endif %}
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 7a49f39..78460ec 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -29,7 +29,8 @@ class Simulation(object):
     After constructing this object, call the (update_E, update_H, update_S) members
      to perform FDTD updates on the stored (E, H, S) fields:
 
-        sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
+        pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
+        sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
         with open('sources.c', 'w') as f:
             f.write('{}'.format(sim.sources))
 
@@ -73,31 +74,34 @@ class Simulation(object):
 
     def __init__(self,
                  epsilon: List[numpy.ndarray],
+                 pmls: List[Dict[str, int or float]],
                  dt: float = .99/numpy.sqrt(3),
                  initial_E: List[numpy.ndarray] = None,
                  initial_H: List[numpy.ndarray] = None,
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 pmls: List[List[str]] = None,
-                 pml_thickness: int = 10,
                  do_poynting: bool = True):
         """
         Initialize the simulation.
 
         :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
                 spanning the simulation domain. Relative epsilon is used.
-        :param dt: Time step. Default is the Courant factor.
+        :param pmls: List of dicts with keys:
+            'axis': One of 'x', 'y', 'z'.
+            'direction': One of 'n', 'p'.
+            'thickness': Number of layers, default 8.
+            'epsilon_eff': Effective epsilon to match to. Default 1.0.
+            'mu_eff': Effective mu to match to. Default 1.0.
+            'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
+            'm': Polynomial grading exponent. Default 3.5.
+            'ma': Exponent for alpha. Default 1.
+        :param dt: Time step. Default is .99/sqrt(3).
         :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
         :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
         :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
         :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
         :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
-        :param pmls: List of [axis, direction] pairs which specify simluation boundaries to be
-                'coated' with a PML (absorbing layer). Axis should be one of 'x', 'y', 'z', and
-                direction should be one of 'n', 'p' (i.e., negative, positive).
-                Default is to apply PMLs to all six boundaries.
-        :param pml_thickness: Thickness of any PMLs, in number of grid cells. Default 10.
         :param do_poynting: If true, enables calculation of the poynting vector, S.
                 Poynting vector calculation adds the following computational burdens:
                     * During update_H, ~6 extra additions/cell are performed in order to spatially
@@ -154,8 +158,13 @@ class Simulation(object):
                 Exception('Initial_H list elements must have same shape as epsilon elements')
             self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
 
-        if pmls is None:
-            pmls = [[d, p] for d in 'xyz' for p in 'np']
+        for pml in pmls:
+            pml.setdefault('thickness', 8)
+            pml.setdefault('epsilon_eff', 1.0)
+            pml.setdefault('mu_eff', 1.0)
+            pml.setdefault('ln_R_per_layer', -1.6)
+            pml.setdefault('m', 3.5)
+            pml.setdefault('ma', 1)
 
         ctype = type_to_C(self.arg_type)
 
@@ -176,7 +185,6 @@ class Simulation(object):
                 )
         jinja_args = {
                 'common_header': common_source,
-                'pml_thickness': pml_thickness,
                 'pmls': pmls,
                 'do_poynting': do_poynting,
                 }
@@ -201,35 +209,39 @@ class Simulation(object):
         '''
         PML
         '''
-        m = (3.5, 1)
-        sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0)
-        alpha_max = 0  # TODO: Decide what to do about non-zero alpha
-
-        def par(x):
-            sigma = ((x / pml_thickness) ** m[0]) * sigma_max
-            alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max
-            p0 = numpy.exp(-(sigma + alpha) * dt)
-            p1 = sigma / (sigma + alpha) * (p0 - 1)
-            return p0, p1
-
-        xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
-        xep -= 0.5
-        xhn -= 0.5
-
-        pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh']
         pml_e_fields = OrderedDict()
         pml_h_fields = OrderedDict()
-        for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)):
-            pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe)
-            pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph)
-
         for pml in pmls:
-            uv = 'xyz'.replace(pml[0], '')
-            psi_base = 'Psi_' + ''.join(pml) + '_'
+            a = 'xyz'.find(pml['axis'])
+
+            sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) / \
+                    numpy.sqrt(pml['epsilon_eff'] * pml['mu_eff'])
+            alpha_max = 0           # TODO: Nonzero alpha
+
+            def par(x):
+                sigma = ((x / pml['thickness']) ** pml['m']) * sigma_max
+                alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
+                p0 = numpy.exp(-(sigma + alpha) * dt)
+                p1 = sigma / (sigma + alpha) * (p0 - 1)
+                return p0, p1
+
+            xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=float_type)[::-1] for _ in range(2))
+            if pml['polarity'] == 'p':
+                xe -= 0.5
+            elif pml['polarity'] == 'n':
+                xh -= 0.5
+
+            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '01'] for eh in 'eh']
+            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
+                pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
+                pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)
+
+            uv = 'xyz'.replace(pml['axis'], '')
+            psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_'
             psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
 
             psi_shape = list(epsilon[0].shape)
-            psi_shape['xyz'.find(pml[0])] = pml_thickness
+            psi_shape[a] = pml['thickness']
 
             for ne, nh in zip(*psi_names):
                 pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)

From 6bd756c3d3ab8025ae3f317cc9e8e243987bf7a4 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 12:37:56 -0700
Subject: [PATCH 06/33] add setup.py

---
 setup.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 setup.py

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..a2f9f9d
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+from setuptools import setup, find_packages
+
+setup(name='opencl_fdtd',
+      version='0.4',
+      description='OpenCL FDTD solver',
+      author='Jan Petykiewicz',
+      author_email='anewusername@gmail.com',
+      url='https://mpxd.net/gogs/jan/opencl_fdtd',
+      packages=find_packages(),
+      package_data={
+          'opencl_fdfd': ['kernels/*']
+      },
+      install_requires=[
+            'numpy',
+            'pyopencl',
+            'jinja2',
+            'fdfd_tools>=0.3',
+      ],
+      extras_require={
+      },
+      )
+

From 9772f47a3482eb7a57872c0219d85753db8822ef Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 14:01:22 -0700
Subject: [PATCH 07/33] fix typo

---
 opencl_fdtd/kernels/update_e.cl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index f7304e7..9162b21 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -54,12 +54,12 @@ pml_{{r ~ p}}_thickness = {{pml['thickness']}};
 
     {%- if p == 'n' %}
 
-if ( {{r}} < pml_{{r ~ p}_thickness ) {
+if ( {{r}} < pml_{{r ~ p}}_thickness ) {
     const size_t ir = {{r}};                          // index into pml parameters
 
     {%- elif p == 'p' %}
 
-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}_thickness ) {
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
     const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
 
     {%- endif %}

From 6a03977a699a1cf5035e48f10d9b02496b0978f5 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 13:11:19 -0700
Subject: [PATCH 08/33] fix pml param names

---
 opencl_fdtd/kernels/update_e.cl | 4 ++--
 opencl_fdtd/kernels/update_h.cl | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 9162b21..63a0c6f 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -64,8 +64,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 
     {%- endif %}
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
-    {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
-    {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
+    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
     pE{{u}}i {{se}}= {{psi ~ u}}[ip];
     pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
 }
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 7c648b6..cdda3a9 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -78,8 +78,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 
     {%- endif %}
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
-    {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
-    {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
+    {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}};
+    {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}};
     pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
     pH{{v}}i {{se}}= {{psi ~ v}}[ip];
 }

From a67009d1c889208f1d6ec5fe2d91788d24a898b5 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Sun, 1 Oct 2017 13:14:27 -0700
Subject: [PATCH 09/33] fix declaration

---
 opencl_fdtd/kernels/update_e.cl | 2 +-
 opencl_fdtd/kernels/update_h.cl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 63a0c6f..eed4ee5 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -50,7 +50,7 @@ ftype pEzi = 0;
         {%- set se, sh = '+', '-' -%}
     {%- endif -%}
 
-pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
 
     {%- if p == 'n' %}
 
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index cdda3a9..27d7a47 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -64,7 +64,7 @@ ftype pHzi = 0;
         {%- set se, sh = '+', '-' -%}
     {%- endif -%}
 
-pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
 
     {%- if p == 'n' %}
 

From aab57412a5b1ab39a4f2426c697612f20f1f3221 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Mon, 15 Jan 2018 22:36:40 -0800
Subject: [PATCH 10/33] move code to new location

---
 README.md        | 8 ++++----
 requirements.txt | 8 ++++----
 setup.py         | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 26d5acd..731450b 100644
--- a/README.md
+++ b/README.md
@@ -27,16 +27,16 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs).
 * numpy
 * pyopencl
 * jinja2
-* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
+* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools)
 
 Optional (used for examples):
 * dill (for file output)
-* [gridlock](https://mpxd.net/gogs/jan/gridlock)
-* [masque](https://mpxd.net/gogs/jan/masque)
+* [gridlock](https://mpxd.net/code/jan/gridlock)
+* [masque](https://mpxd.net/code/jan/masque)
 
 To get the code, just clone this repository:
 ```bash
-git clone https://mpxd.net/gogs/jan/opencl_fdtd.git
+git clone https://mpxd.net/code/jan/opencl_fdtd.git
 ```
 
 You can install the requirements and their dependencies easily with
diff --git a/requirements.txt b/requirements.txt
index 6ed651b..6626171 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy
 h5py
 pyopencl
 jinja2
--e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster
--e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock
--e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque
--e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools
+-e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster
+-e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock
+-e git+https://mpxd.net/code/jan/masque.git@release#egg=masque
+-e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools
diff --git a/setup.py b/setup.py
index a2f9f9d..55f5af9 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ setup(name='opencl_fdtd',
       description='OpenCL FDTD solver',
       author='Jan Petykiewicz',
       author_email='anewusername@gmail.com',
-      url='https://mpxd.net/gogs/jan/opencl_fdtd',
+      url='https://mpxd.net/code/jan/opencl_fdtd',
       packages=find_packages(),
       package_data={
           'opencl_fdfd': ['kernels/*']

From c8298d916fbec02925030d977288ab411d6d49c9 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 16 Sep 2018 20:12:26 -0700
Subject: [PATCH 11/33] Use python3 for setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 55f5af9..c7e55f3 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from setuptools import setup, find_packages
 

From d0b303523e735193c021a7b7de64841dbcc47bc3 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 16 Sep 2018 20:12:41 -0700
Subject: [PATCH 12/33] Move version string into module

---
 opencl_fdtd/__init__.py | 4 ++++
 setup.py                | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py
index e3e95b4..b621c19 100644
--- a/opencl_fdtd/__init__.py
+++ b/opencl_fdtd/__init__.py
@@ -1 +1,5 @@
 from .simulation import Simulation, type_to_C
+
+__author__ = 'Jan Petykiewicz'
+
+version = '0.4'
diff --git a/setup.py b/setup.py
index c7e55f3..8b098db 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python3
 
 from setuptools import setup, find_packages
+import opencl_fdtd
 
 setup(name='opencl_fdtd',
-      version='0.4',
+      version=opencl_fdtd.version,
       description='OpenCL FDTD solver',
       author='Jan Petykiewicz',
       author_email='anewusername@gmail.com',

From 1de6fb0e39f39ad2b047116668ab44de0a5429e3 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 16 Sep 2018 20:12:52 -0700
Subject: [PATCH 13/33] Use readme as long_description

---
 setup.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/setup.py b/setup.py
index 8b098db..b7fd3cf 100644
--- a/setup.py
+++ b/setup.py
@@ -3,9 +3,14 @@
 from setuptools import setup, find_packages
 import opencl_fdtd
 
+with open('README.md', 'r') as f:
+    long_description = f.read()
+
 setup(name='opencl_fdtd',
       version=opencl_fdtd.version,
       description='OpenCL FDTD solver',
+      long_description=long_description,
+      long_description_content_type='text/markdown',
       author='Jan Petykiewicz',
       author_email='anewusername@gmail.com',
       url='https://mpxd.net/code/jan/opencl_fdtd',

From ea5e298023e74eb3c1865658cfdaedb1f06d9c58 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 01:35:20 -0800
Subject: [PATCH 14/33] Explicitly cast to int

---
 opencl_fdtd/kernels/common.cl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/opencl_fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl
index d203ca7..deecec9 100644
--- a/opencl_fdtd/kernels/common.cl
+++ b/opencl_fdtd/kernels/common.cl
@@ -73,9 +73,9 @@ __global ftype *Hz = H + ZZ;
  * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
  */
 {% for r in 'xyz' %}
-int m{{r}} = -di{{r}};
-int p{{r}} = +di{{r}};
-int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
+int m{{r}} = - (int) di{{r}};
+int p{{r}} = + (int) di{{r}};
+int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}};
 if ( {{r}} == 0 ) {
   m{{r}} = wrap_{{r}};
 } else if ( {{r}} == s{{r}} - 1 ) {

From 1e874cb0c029cae2beeb4a0fae5d60de53106a83 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 01:36:13 -0800
Subject: [PATCH 15/33] Fix sign on H component of PML

---
 opencl_fdtd/kernels/update_h.cl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 27d7a47..968c2b6 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -96,9 +96,9 @@ ftype Hz_old = Hz[i];
 {%- endif %}
 
 // H update equations
-Hx[i] -= dt * (dEzy - dEyz + pHxi);
-Hy[i] -= dt * (dExz - dEzx + pHyi);
-Hz[i] -= dt * (dEyx - dExy + pHzi);
+Hx[i] -= dt * (dEzy - dEyz - pHxi);
+Hy[i] -= dt * (dExz - dEzx - pHyi);
+Hz[i] -= dt * (dEyx - dExy - pHzi);
 
 {% if do_poynting -%}
 // Average H across timesteps

From 2b1d906665c6394561b613ba62af832bbfb72ea3 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 01:38:28 -0800
Subject: [PATCH 16/33] Add _create_field() and _create_eps()

---
 opencl_fdtd/simulation.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 78460ec..525d818 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -273,7 +273,24 @@ class Simulation(object):
                                          arguments=', '.join(S_args.keys()))
 
             self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
+    def _create_eps(self, epsilon: List[numpy.ndarray]):
+        if len(epsilon) != 3:
+            raise Exception('Epsilon must be a list with length of 3')
+        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
+            raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+        if not epsilon[0].shape == self.shape:
+            raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
+        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
 
+    def _create_field(self, initial_value: List[numpy.ndarray] = None):
+        if initial_value is None:
+            return pyopencl.array.zeros_like(self.eps)
+        else:
+            if len(initial_value) != 3:
+                Exception('Initial field value must be a list of length 3')
+            if not all((f.shape == self.shape for f in initial_value)):
+                Exception('Initial field list elements must have same shape as epsilon elements')
+            return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
 
 def type_to_C(float_type) -> str:
     """

From f00c8b4a3eb921fc8ed118f8b7f240178a5f3c47 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 01:59:05 -0800
Subject: [PATCH 17/33] Add _create_context(), _create_operation(), and
 _create_pmls(), and generalize initial field value args

---
 opencl_fdtd/simulation.py | 116 ++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 66 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 525d818..7b94c64 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -76,8 +76,7 @@ class Simulation(object):
                  epsilon: List[numpy.ndarray],
                  pmls: List[Dict[str, int or float]],
                  dt: float = .99/numpy.sqrt(3),
-                 initial_E: List[numpy.ndarray] = None,
-                 initial_H: List[numpy.ndarray] = None,
+                 initial_fields: Dict[str, List[numpy.ndarray]] = None,
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
@@ -113,21 +112,14 @@ class Simulation(object):
                     * GPU memory requirements are approximately doubled, since S and the intermediate
                         products must be stored.
         """
+        if initial_fields is None:
+            initial_fields = {}
 
-        if len(epsilon) != 3:
-            Exception('Epsilon must be a list with length of 3')
-        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
-            Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
-
-        if context is None:
-            self.context = pyopencl.create_some_context()
-        else:
-            self.context = context
-
-        if queue is None:
-            self.queue = pyopencl.CommandQueue(self.context)
-        else:
-            self.queue = queue
+        self.shape = epsilon[0].shape
+        self.arg_type = float_type
+        self.sources = {}
+        self._create_context(context, queue)
+        self._create_eps(epsilon)
 
         if dt > .99/numpy.sqrt(3):
             warnings.warn('Warning: unstable dt: {}'.format(dt))
@@ -136,27 +128,8 @@ class Simulation(object):
         else:
             self.dt = dt
 
-        self.arg_type = float_type
-        self.sources = {}
-        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type))
-
-        if initial_E is None:
-            self.E = pyopencl.array.zeros_like(self.eps)
-        else:
-            if len(initial_E) != 3:
-                Exception('Initial_E must be a list of length 3')
-            if not all((E.shape == epsilon[0].shape for E in initial_E)):
-                Exception('Initial_E list elements must have same shape as epsilon elements')
-            self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
-
-        if initial_H is None:
-            self.H = pyopencl.array.zeros_like(self.eps)
-        else:
-            if len(initial_H) != 3:
-                Exception('Initial_H must be a list of length 3')
-            if not all((H.shape == epsilon[0].shape for H in initial_H)):
-                Exception('Initial_H list elements must have same shape as epsilon elements')
-            self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
+        self.E = self._create_field(initial_fields.get('E', None))
+        self.H = self._create_field(initial_fields.get('H', None))
 
         for pml in pmls:
             pml.setdefault('thickness', 8)
@@ -181,7 +154,7 @@ class Simulation(object):
 
         common_source = jinja_env.get_template('common.cl').render(
                 ftype=ctype,
-                shape=epsilon[0].shape,
+                shape=self.shape,
                 )
         jinja_args = {
                 'common_header': common_source,
@@ -194,21 +167,37 @@ class Simulation(object):
         self.sources['E'] = E_source
         self.sources['H'] = H_source
 
+
+
+        S_fields = OrderedDict()
         if do_poynting:
             S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
             self.sources['S'] = S_source
 
-            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type)
+            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
             self.S = pyopencl.array.zeros_like(self.E)
-            S_fields = OrderedDict()
             S_fields[ptr('oS')] = self.oS
             S_fields[ptr('S')] = self.S
-        else:
-            S_fields = OrderedDict()
 
         '''
         PML
         '''
+        pml_e_fields, pml_h_fields = self._create_pmls(pmls)
+
+        '''
+        Create operations
+        '''
+        self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
+        self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
+        if do_poynting:
+            self.update_S = self._create_operation(S_source, (base_fields, S_fields))
+
+
+    def _create_pmls(self, pmls):
+        ctype = type_to_C(self.arg_type)
+        def ptr(arg: str) -> str:
+            return ctype + ' *' + arg
+
         pml_e_fields = OrderedDict()
         pml_h_fields = OrderedDict()
         for pml in pmls:
@@ -225,7 +214,7 @@ class Simulation(object):
                 p1 = sigma / (sigma + alpha) * (p0 - 1)
                 return p0, p1
 
-            xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=float_type)[::-1] for _ in range(2))
+            xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2))
             if pml['polarity'] == 'p':
                 xe -= 0.5
             elif pml['polarity'] == 'n':
@@ -240,39 +229,34 @@ class Simulation(object):
             psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_'
             psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
 
-            psi_shape = list(epsilon[0].shape)
+            psi_shape = list(self.shape)
             psi_shape[a] = pml['thickness']
 
             for ne, nh in zip(*psi_names):
                 pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
                 pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+        return pml_e_fields, pml_h_fields
 
-        self.pml_e_fields = pml_e_fields
-        self.pml_h_fields = pml_h_fields
+    def _create_operation(self, source, args_fields):
+        args = OrderedDict()
+        [args.update(d) for d in args_fields]
+        update = ElementwiseKernel(self.context, operation=source,
+                                   arguments=', '.join(args.keys()))
+        return lambda e: update(*args.values(), wait_for=e)
 
 
-        '''
-        Create operations
-        '''
-        E_args = OrderedDict()
-        [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)]
-        E_update = ElementwiseKernel(self.context, operation=E_source,
-                                     arguments=', '.join(E_args.keys()))
+    def _create_context(self, context: pyopencl.Context = None,
+                        queue: pyopencl.CommandQueue = None):
+        if context is None:
+            self.context = pyopencl.create_some_context()
+        else:
+            self.context = context
 
-        H_args = OrderedDict()
-        [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)]
-        H_update = ElementwiseKernel(self.context, operation=H_source,
-                                     arguments=', '.join(H_args.keys()))
-        self.update_E = lambda e: E_update(*E_args.values(), wait_for=e)
-        self.update_H = lambda e: H_update(*H_args.values(), wait_for=e)
+        if queue is None:
+            self.queue = pyopencl.CommandQueue(self.context)
+        else:
+            self.queue = queue
 
-        if do_poynting:
-            S_args = OrderedDict()
-            [S_args.update(d) for d in (base_fields, S_fields)]
-            S_update = ElementwiseKernel(self.context, operation=S_source,
-                                         arguments=', '.join(S_args.keys()))
-
-            self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
     def _create_eps(self, epsilon: List[numpy.ndarray]):
         if len(epsilon) != 3:
             raise Exception('Epsilon must be a list with length of 3')

From cb471df1827ac726e3a36d72a9868177897b90ba Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 02:00:30 -0800
Subject: [PATCH 18/33] Implement proper kappa for PML

---
 opencl_fdtd/kernels/update_e.cl |  2 ++
 opencl_fdtd/kernels/update_h.cl |  4 ++++
 opencl_fdtd/simulation.py       | 19 +++++++++++--------
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index eed4ee5..5a9ed1a 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -64,6 +64,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 
     {%- endif %}
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
+    dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
     {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
     {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
     pE{{u}}i {{se}}= {{psi ~ u}}[ip];
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 968c2b6..56d9bbc 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -45,6 +45,8 @@ ftype aEzy = Ez[i + py] + Ez[i];
 {%- endif %}
 
 
+
+
 /*
  *  PML Update
  */
@@ -78,6 +80,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
 
     {%- endif %}
     const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    dE{{v ~ r}} *= p{{r}}2h{{p}}[ir];
+    dE{{u ~ r}} *= p{{r}}2h{{p}}[ir];
     {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}};
     {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}};
     pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 7b94c64..1b6d9a2 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -203,16 +203,19 @@ class Simulation(object):
         for pml in pmls:
             a = 'xyz'.find(pml['axis'])
 
-            sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) / \
-                    numpy.sqrt(pml['epsilon_eff'] * pml['mu_eff'])
-            alpha_max = 0           # TODO: Nonzero alpha
+            sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1)
+            kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
+            alpha_max = 0           # TODO: Nonzero alpha?
 
             def par(x):
-                sigma = ((x / pml['thickness']) ** pml['m']) * sigma_max
+                scaling = ((x / (pml['thickness'])) ** pml['m'])
+                sigma = scaling * sigma_max
+                kappa = 1 + scaling * (kappa_max - 1)
                 alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
-                p0 = numpy.exp(-(sigma + alpha) * dt)
-                p1 = sigma / (sigma + alpha) * (p0 - 1)
-                return p0, p1
+                p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt)
+                p1 = sigma / (sigma + kappa * alpha) * (p0 - 1)
+                p2 = 1/kappa
+                return p0, p1, p2
 
             xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2))
             if pml['polarity'] == 'p':
@@ -220,7 +223,7 @@ class Simulation(object):
             elif pml['polarity'] == 'n':
                 xh -= 0.5
 
-            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '01'] for eh in 'eh']
+            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
             for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
                 pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
                 pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)

From d8dc024626f872d2a9310745c96d02413b1e1982 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 02:01:28 -0800
Subject: [PATCH 19/33] Implementation of "source" fields (J)

---
 opencl_fdtd/kernels/update_j.cl | 32 ++++++++++++++++++++++++++++++++
 opencl_fdtd/simulation.py       | 24 +++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 opencl_fdtd/kernels/update_j.cl

diff --git a/opencl_fdtd/kernels/update_j.cl b/opencl_fdtd/kernels/update_j.cl
new file mode 100644
index 0000000..4d7f64d
--- /dev/null
+++ b/opencl_fdtd/kernels/update_j.cl
@@ -0,0 +1,32 @@
+/*
+ *  Update E-field from J field 
+ *
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *
+ *  OpenCL args:
+ *   E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax   
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+__global ftype *Jrx = Jr + XX;
+__global ftype *Jry = Jr + YY;
+__global ftype *Jrz = Jr + ZZ;
+__global ftype *Jix = Ji + XX;
+__global ftype *Jiy = Ji + YY;
+__global ftype *Jiz = Ji + ZZ;
+
+
+if (x < xmin || y < ymin || z < zmin) {
+   PYOPENCL_ELWISE_CONTINUE;
+}
+if (x >= xmax || y >= ymax || z >= zmax) {
+   PYOPENCL_ELWISE_CONTINUE;
+}
+
+Ex[i] += c * Jrx[i] + s * Jix[i];
+Ey[i] += c * Jry[i] + s * Jiy[i];
+Ez[i] += c * Jrz[i] + s * Jiz[i];
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 1b6d9a2..6e12b2e 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -70,6 +70,7 @@ class Simulation(object):
     update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
     update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
     update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_J = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
     sources = None      # type: Dict[str, str]
 
     def __init__(self,
@@ -80,7 +81,8 @@ class Simulation(object):
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
                  float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 do_poynting: bool = True):
+                 do_poynting: bool = True,
+                 do_fieldsrc: bool = False):
         """
         Initialize the simulation.
 
@@ -179,6 +181,17 @@ class Simulation(object):
             S_fields[ptr('oS')] = self.oS
             S_fields[ptr('S')] = self.S
 
+        J_fields = OrderedDict()
+        if do_fieldsrc:
+            J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
+            self.sources['J'] = J_source
+
+            self.Ji = pyopencl.array.zeros_like(self.E)
+            self.Jr = pyopencl.array.zeros_like(self.E)
+            J_fields[ptr('Jr')] = self.Jr
+            J_fields[ptr('Ji')] = self.Ji
+
+
         '''
         PML
         '''
@@ -191,6 +204,15 @@ class Simulation(object):
         self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
         if do_poynting:
             self.update_S = self._create_operation(S_source, (base_fields, S_fields))
+        if do_fieldsrc:
+            args = OrderedDict()
+            [args.update(d) for d in (base_fields, J_fields)]
+            var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
+            print(var_args)
+            update = ElementwiseKernel(self.context, operation=J_source,
+                                       arguments=', '.join(list(args.keys()) + var_args))
+            #print(len(args.values()),'\n\n',  args.values(), args.keys())
+            self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
 
 
     def _create_pmls(self, pmls):

From 3ac20f62718f52d3ea710fb4fb24d135eb80faf7 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Thu, 29 Nov 2018 02:01:48 -0800
Subject: [PATCH 20/33] Add bloch boundaries (untested)

---
 opencl_fdtd/kernels/update_e.cl | 12 ++++++++++++
 opencl_fdtd/kernels/update_h.cl | 15 +++++++++++++++
 opencl_fdtd/simulation.py       | 33 ++++++++++++++++++++++++++++++++-
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index 5a9ed1a..c578414 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -31,6 +31,18 @@ ftype dHyz = Hy[i] - Hy[i + mz];
 ftype dHzx = Hz[i] - Hz[i + mx];
 ftype dHzy = Hz[i] - Hz[i + my];
 
+{% for bloch in bloch_boundaries -%}
+    {%- set r = bloch['axis'] -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+if ({{r}} == 0) {
+    ftype bloch_im = {{bloch['real']}};
+    ftype bloch_re = {{bloch['imag']}};
+    dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]);
+    dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]);
+}
+{%- endfor %}
+
+
 /*
  *   PML Update
  */
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 56d9bbc..0ef15dd 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -28,6 +28,21 @@ ftype dEyz = Ey[i + pz] - Ey[i];
 ftype dEzx = Ez[i + px] - Ez[i];
 ftype dEzy = Ez[i + py] - Ez[i];
 
+
+{% for bloch in bloch_boundaries -%}
+    {%- set r = bloch['axis'] -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+if ({{r}} == s{{r}} - 1) {
+    ftype bloch_re = {{bloch['real']}};
+    ftype bloch_im = {{bloch['imag']}};
+    dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
+    dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
+}
+{%- endfor %}
+
+
+
+
 {%- if do_poynting %}
 
 
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 6e12b2e..9b23cad 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -76,6 +76,7 @@ class Simulation(object):
     def __init__(self,
                  epsilon: List[numpy.ndarray],
                  pmls: List[Dict[str, int or float]],
+                 bloch_boundaries: List[Dict[str, int or float]] = (),
                  dt: float = .99/numpy.sqrt(3),
                  initial_fields: Dict[str, List[numpy.ndarray]] = None,
                  context: pyopencl.Context = None,
@@ -132,6 +133,9 @@ class Simulation(object):
 
         self.E = self._create_field(initial_fields.get('E', None))
         self.H = self._create_field(initial_fields.get('H', None))
+        if bloch_boundaries:
+            self.F = self._create_field(initial_fields.get('F', None))
+            self.G = self._create_field(initial_fields.get('G', None))
 
         for pml in pmls:
             pml.setdefault('thickness', 8)
@@ -154,6 +158,17 @@ class Simulation(object):
         eps_field = OrderedDict()
         eps_field[ptr('eps')] = self.eps
 
+        if bloch_boundaries:
+            base_fields[ptr('F')] = self.F
+            base_fields[ptr('G')] = self.G
+
+            bloch_fields = OrderedDict()
+            bloch_fields[ptr('E')] = self.F
+            bloch_fields[ptr('H')] = self.G
+            bloch_fields[ctype + ' dt'] = self.dt
+            bloch_fields[ptr('F')] = self.E
+            bloch_fields[ptr('G')] = self.H
+
         common_source = jinja_env.get_template('common.cl').render(
                 ftype=ctype,
                 shape=self.shape,
@@ -162,13 +177,24 @@ class Simulation(object):
                 'common_header': common_source,
                 'pmls': pmls,
                 'do_poynting': do_poynting,
+                'bloch': bloch_boundaries,
                 }
         E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
         H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
-
         self.sources['E'] = E_source
         self.sources['H'] = H_source
 
+        if bloch_boundaries:
+            bloch_args = jinja_args.copy()
+            bloch_args['do_poynting'] = False
+            bloch_args['bloch'] = [{'axis': b['axis'],
+                                    'real': b['imag'],
+                                    'imag': b['real']}
+                                   for b in bloch_boundaries]
+            F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
+            G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
+            self.sources['F'] = F_source
+            self.sources['G'] = G_source
 
 
         S_fields = OrderedDict()
@@ -196,6 +222,8 @@ class Simulation(object):
         PML
         '''
         pml_e_fields, pml_h_fields = self._create_pmls(pmls)
+        if bloch_boundaries:
+            pml_f_fields, pml_g_fields = self._create_pmls(pmls)
 
         '''
         Create operations
@@ -204,6 +232,9 @@ class Simulation(object):
         self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
         if do_poynting:
             self.update_S = self._create_operation(S_source, (base_fields, S_fields))
+        if bloch_boundaries:
+            self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
+            self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
         if do_fieldsrc:
             args = OrderedDict()
             [args.update(d) for d in (base_fields, J_fields)]

From 5ffe547640c78af55a7a8e802f8a503cee21e950 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:13:27 -0800
Subject: [PATCH 21/33] Typo fixes and minor comment updates

---
 opencl_fdtd/kernels/update_e.cl |  4 ++--
 opencl_fdtd/kernels/update_h.cl |  2 +-
 opencl_fdtd/simulation.py       | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index c578414..f44527d 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -4,7 +4,7 @@
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
-        axes, polarities, and thicknesses.
+ *      axes, polarities, and thicknesses.
  *
  *  OpenCL args:
  *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
@@ -20,7 +20,7 @@ __global ftype *epsz = eps + ZZ;
 
 
 /*
- *   Precalclate derivatives
+ *   Precalculate derivatives
  */
 ftype dHxy = Hx[i] - Hx[i + my];
 ftype dHxz = Hx[i] - Hx[i + mz];
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index 0ef15dd..f1c19cb 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -5,7 +5,7 @@
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
-        axes, polarities, and thicknesses.
+ *      axes, polarities, and thicknesses.
  *   do_poynting: Whether to precalculate poynting vector components (boolean)
  *
  *  OpenCL args:
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 9b23cad..0eaef5d 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -56,11 +56,11 @@ class Simulation(object):
      event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
      event2 can then be used to prepare further operations to be run after update_H.
     """
-    E = None    # type: List[pyopencl.array.Array]
-    H = None    # type: List[pyopencl.array.Array]
-    S = None    # type: List[pyopencl.array.Array]
-    eps = None  # type: List[pyopencl.array.Array]
-    dt = None   # type: float
+    E = None            # type: pyopencl.array.Array
+    H = None            # type: pyopencl.array.Array
+    S = None            # type: pyopencl.array.Array
+    eps = None          # type: pyopencl.array.Array
+    dt = None           # type: float
 
     arg_type = None     # type: numpy.float32 or numpy.float64
 

From f5a5a0ad04176e8ebbca15c8e4a53300e5b2f5e6 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:36:11 -0800
Subject: [PATCH 22/33] Enable nonuniform grids (minimally tested)

---
 opencl_fdtd/kernels/update_e.cl | 28 +++++++++++++++++++++-------
 opencl_fdtd/kernels/update_h.cl | 28 +++++++++++++++++++++-------
 opencl_fdtd/simulation.py       | 28 +++++++++++++++++++++++++---
 3 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl
index f44527d..9127181 100644
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@@ -5,9 +5,12 @@
  *   common_header: Rendered contents of common.cl
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
  *      axes, polarities, and thicknesses.
+ *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
+ *      Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as
+ *      OpenCL args.
  *
  *  OpenCL args:
- *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
+ *   E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}]
  */
 
 {{common_header}}
@@ -19,17 +22,28 @@ __global ftype *epsy = eps + YY;
 __global ftype *epsz = eps + ZZ;
 
 
+{%- if uniform_dx %}
+ftype inv_dx = 1.0 / {{uniform_dx}};
+ftype inv_dy = 1.0 / {{uniform_dx}};
+ftype inv_dz = 1.0 / {{uniform_dx}};
+{%- else %}
+ftype inv_dx = inv_dhx[x];
+ftype inv_dy = inv_dhy[y];
+ftype inv_dz = inv_dhz[z];
+{%- endif %}
+
+
 /*
  *   Precalculate derivatives
  */
-ftype dHxy = Hx[i] - Hx[i + my];
-ftype dHxz = Hx[i] - Hx[i + mz];
+ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy;
+ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz;
 
-ftype dHyx = Hy[i] - Hy[i + mx];
-ftype dHyz = Hy[i] - Hy[i + mz];
+ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx;
+ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz;
 
-ftype dHzx = Hz[i] - Hz[i + mx];
-ftype dHzy = Hz[i] - Hz[i + my];
+ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx;
+ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy;
 
 {% for bloch in bloch_boundaries -%}
     {%- set r = bloch['axis'] -%}
diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index f1c19cb..ac75157 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -7,9 +7,12 @@
  *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
  *      axes, polarities, and thicknesses.
  *   do_poynting: Whether to precalculate poynting vector components (boolean)
+ *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
+ *      Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
+ *      OpenCL args.
  *
  *  OpenCL args:
- *   E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
  */
 
 {{common_header}}
@@ -19,14 +22,25 @@
 /*
  *  Precalculate derivatives
  */
-ftype dExy = Ex[i + py] - Ex[i];
-ftype dExz = Ex[i + pz] - Ex[i];
+{%- if uniform_dx %}
+ftype inv_dx = 1.0 / {{uniform_dx}};
+ftype inv_dy = 1.0 / {{uniform_dx}};
+ftype inv_dz = 1.0 / {{uniform_dx}};
+{%- else %}
+ftype inv_dx = inv_dex[x];
+ftype inv_dy = inv_dey[y];
+ftype inv_dz = inv_dez[z];
+{%- endif %}
 
-ftype dEyx = Ey[i + px] - Ey[i];
-ftype dEyz = Ey[i + pz] - Ey[i];
 
-ftype dEzx = Ez[i + px] - Ez[i];
-ftype dEzy = Ez[i + py] - Ez[i];
+ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy;
+ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz;
+
+ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx;
+ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz;
+
+ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx;
+ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy;
 
 
 {% for bloch in bloch_boundaries -%}
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 0eaef5d..940725a 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -61,6 +61,7 @@ class Simulation(object):
     S = None            # type: pyopencl.array.Array
     eps = None          # type: pyopencl.array.Array
     dt = None           # type: float
+    inv_dxes = None     # type: List[pyopencl.array.Array]
 
     arg_type = None     # type: numpy.float32 or numpy.float64
 
@@ -77,7 +78,8 @@ class Simulation(object):
                  epsilon: List[numpy.ndarray],
                  pmls: List[Dict[str, int or float]],
                  bloch_boundaries: List[Dict[str, int or float]] = (),
-                 dt: float = .99/numpy.sqrt(3),
+                 dxes: List[List[numpy.ndarray]] or float = None,
+                 dt: float = None,
                  initial_fields: Dict[str, List[numpy.ndarray]] = None,
                  context: pyopencl.Context = None,
                  queue: pyopencl.CommandQueue = None,
@@ -98,7 +100,7 @@ class Simulation(object):
             'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
             'm': Polynomial grading exponent. Default 3.5.
             'ma': Exponent for alpha. Default 1.
-        :param dt: Time step. Default is .99/sqrt(3).
+        :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
         :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
         :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
         :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
@@ -124,7 +126,22 @@ class Simulation(object):
         self._create_context(context, queue)
         self._create_eps(epsilon)
 
-        if dt > .99/numpy.sqrt(3):
+        if dxes is None:
+            dxes = 1.0
+
+        if isinstance(dxes, (float, int)):
+            uniform_dx = dxes
+            min_dx = dxes
+        else:
+            uniform_dx = False
+            self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]]
+            min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
+
+        max_dt = min_dx * .99 / numpy.sqrt(3)
+
+        if dt is None:
+            self.dt = max_dt
+        elif dt > max_dt:
             warnings.warn('Warning: unstable dt: {}'.format(dt))
         elif dt <= 0:
             raise Exception('Invalid dt: {}'.format(dt))
@@ -154,6 +171,10 @@ class Simulation(object):
         base_fields[ptr('E')] = self.E
         base_fields[ptr('H')] = self.H
         base_fields[ctype + ' dt'] = self.dt
+        if uniform_dx == False:
+            inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
+            for name, field in zip(inv_dx_names, self.inv_dxes):
+                base_fields[ptr(name)] = field
 
         eps_field = OrderedDict()
         eps_field[ptr('eps')] = self.eps
@@ -178,6 +199,7 @@ class Simulation(object):
                 'pmls': pmls,
                 'do_poynting': do_poynting,
                 'bloch': bloch_boundaries,
+                'uniform_dx': uniform_dx,
                 }
         E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
         H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)

From 65cc1183921b6107b8487fc762a1606bb6512d8a Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:36:22 -0800
Subject: [PATCH 23/33] Remove debug code

---
 opencl_fdtd/simulation.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 940725a..087e9c4 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -261,10 +261,8 @@ class Simulation(object):
             args = OrderedDict()
             [args.update(d) for d in (base_fields, J_fields)]
             var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
-            print(var_args)
             update = ElementwiseKernel(self.context, operation=J_source,
                                        arguments=', '.join(list(args.keys()) + var_args))
-            #print(len(args.values()),'\n\n',  args.values(), args.keys())
             self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
 
 

From 9cf50b1d88f2745cbf8f8215e23ec0610de67b84 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:36:32 -0800
Subject: [PATCH 24/33] Enable nonzero pml alpha

---
 opencl_fdtd/simulation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 087e9c4..d24dcf2 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -160,6 +160,7 @@ class Simulation(object):
             pml.setdefault('mu_eff', 1.0)
             pml.setdefault('ln_R_per_layer', -1.6)
             pml.setdefault('m', 3.5)
+            pml.setdefault('cfs_alpha', 0)
             pml.setdefault('ma', 1)
 
         ctype = type_to_C(self.arg_type)
@@ -278,7 +279,7 @@ class Simulation(object):
 
             sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1)
             kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
-            alpha_max = 0           # TODO: Nonzero alpha?
+            alpha_max = pml['cfs_alpha']
 
             def par(x):
                 scaling = ((x / (pml['thickness'])) ** pml['m'])

From 60b70bb332d29fb5a2aecb801517be659cf4daf9 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:37:07 -0800
Subject: [PATCH 25/33] Add restrict keyword to pointers (not sharing the same
 memory for multiple fields)

---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index d24dcf2..1e573c9 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -166,7 +166,7 @@ class Simulation(object):
         ctype = type_to_C(self.arg_type)
 
         def ptr(arg: str) -> str:
-            return ctype + ' *' + arg
+            return ctype + ' * restrict ' + arg
 
         base_fields = OrderedDict()
         base_fields[ptr('E')] = self.E

From a76e741d325d26ff5968904ae12f4ed95effd313 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Fri, 30 Nov 2018 00:37:16 -0800
Subject: [PATCH 26/33] Minor spacing changes

---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 1e573c9..988e45b 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -321,7 +321,6 @@ class Simulation(object):
                                    arguments=', '.join(args.keys()))
         return lambda e: update(*args.values(), wait_for=e)
 
-
     def _create_context(self, context: pyopencl.Context = None,
                         queue: pyopencl.CommandQueue = None):
         if context is None:
@@ -353,6 +352,7 @@ class Simulation(object):
                 Exception('Initial field list elements must have same shape as epsilon elements')
             return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
 
+
 def type_to_C(float_type) -> str:
     """
     Returns a string corresponding to the C equivalent of a numpy type.

From 0f0f16e1846489ac1ab4c4126167c5b0b3fbb0fe Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Fri, 30 Nov 2018 01:03:10 -0800
Subject: [PATCH 27/33] Minor formatting changes

---
 opencl_fdtd/kernels/update_h.cl | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl
index ac75157..187a18a 100644
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@@ -52,10 +52,7 @@ if ({{r}} == s{{r}} - 1) {
     dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
     dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
 }
-{%- endfor %}
-
-
-
+{% endfor -%}
 
 {%- if do_poynting %}
 
@@ -93,7 +90,7 @@ ftype pHzi = 0;
         {%- set se, sh = '-', '+' -%}
     {%- else -%}
         {%- set se, sh = '+', '-' -%}
-    {%- endif -%}
+    {%- endif %}
 
 int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
 

From 385e2859a9620cefe3536bd2eea9f5a051c2e9b2 Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Fri, 30 Nov 2018 01:03:20 -0800
Subject: [PATCH 28/33] Update parameter descriptions

---
 opencl_fdtd/simulation.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 988e45b..9dd90c0 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -100,9 +100,13 @@ class Simulation(object):
             'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
             'm': Polynomial grading exponent. Default 3.5.
             'ma': Exponent for alpha. Default 1.
+        :param bloch_boundaries: List of dicts with keys:
+            'axis': One of 'x', 'y', 'z'.
+            'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
+            'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
         :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
-        :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
-        :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
+        :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
+            specified fields (default is 0 everywhere). Fields have same format as epsilon.
         :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
         :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
         :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.

From 582dafbc2fefce0bcc38b524a715e99f9c5ac89b Mon Sep 17 00:00:00 2001
From: jan <jan@mpxd.net>
Date: Fri, 30 Nov 2018 01:04:00 -0800
Subject: [PATCH 29/33] Update example with bloch fields

---
 fdtd.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index 08606c6..99af07b 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -128,7 +128,9 @@ def main():
     # #### Create the simulation grid ####
     pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
             for a in 'xyz' for p in 'np']
-    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
+    #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
+    bloch = []
+    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
@@ -142,22 +144,32 @@ def main():
 
     with open('sources.c', 'w') as f:
         f.write(sim.sources['E'])
-        f.write('\n==========================================\n')
+        f.write('\n====================H======================\n')
         f.write(sim.sources['H'])
         if sim.update_S:
-            f.write('\n==========================================\n')
+            f.write('\n=====================S=====================\n')
             f.write(sim.sources['S'])
+        if bloch:
+            f.write('\n=====================F=====================\n')
+            f.write(sim.sources['F'])
+            f.write('\n=====================G=====================\n')
+            f.write(sim.sources['G'])
 
     # #### Run a bunch of iterations ####
     # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
     #  immediately and run once prev_event is finished.
     start = time.perf_counter()
     for t in range(max_t):
-        sim.update_E([]).wait()
+        e = sim.update_E([])
+        if bloch:
+            e = sim.update_F([e])
+        e.wait()
 
         ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
         sim.E[ind] += field_source(t)
         e = sim.update_H([])
+        if bloch:
+            e = sim.update_G([e])
         if sim.update_S:
             e = sim.update_S([e])
         e.wait()

From a37df3a74f10837c2c0a65cc8ac412cd7aba26bf Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 14 Jul 2019 23:48:13 -0700
Subject: [PATCH 30/33] expand gitignores

---
 .gitignore | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3b93ce0..5298f42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,9 @@
 .idea/
-__pycache__
 *.h5
-*.pyc
+
+__pycache__
+*.py[cod]
+build/
+dist/
+*.egg-info/
 

From 314e36a3cc3b16fb3250a5a22b570ed84f4f13f7 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 14 Jul 2019 23:50:11 -0700
Subject: [PATCH 31/33] fix for 1-sized grids

---
 opencl_fdtd/kernels/common.cl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/opencl_fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl
index deecec9..8c58d48 100644
--- a/opencl_fdtd/kernels/common.cl
+++ b/opencl_fdtd/kernels/common.cl
@@ -78,7 +78,8 @@ int p{{r}} = + (int) di{{r}};
 int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}};
 if ( {{r}} == 0 ) {
   m{{r}} = wrap_{{r}};
-} else if ( {{r}} == s{{r}} - 1 ) {
+}
+if ( {{r}} == s{{r}} - 1 ) {
   p{{r}} = -wrap_{{r}};
-} 
+}
 {% endfor %}

From fe2f4f45109a21935056de044e381792fc950001 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Sun, 14 Jul 2019 23:51:57 -0700
Subject: [PATCH 32/33] style fixes

---
 opencl_fdtd/kernels/common.cl   | 2 +-
 opencl_fdtd/kernels/update_j.cl | 4 ++--
 opencl_fdtd/simulation.py       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/opencl_fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl
index 8c58d48..769c248 100644
--- a/opencl_fdtd/kernels/common.cl
+++ b/opencl_fdtd/kernels/common.cl
@@ -2,7 +2,7 @@
 /* Common code for E, H updates
  *
  * Template parameters:
- *  ftype	type name (e.g. float or double)
+ *  ftype       type name (e.g. float or double)
  *  shape       list of 3 ints specifying shape of fields
  */
 #}
diff --git a/opencl_fdtd/kernels/update_j.cl b/opencl_fdtd/kernels/update_j.cl
index 4d7f64d..2682f39 100644
--- a/opencl_fdtd/kernels/update_j.cl
+++ b/opencl_fdtd/kernels/update_j.cl
@@ -1,11 +1,11 @@
 /*
- *  Update E-field from J field 
+ *  Update E-field from J field
  *
  *  Template parameters:
  *   common_header: Rendered contents of common.cl
  *
  *  OpenCL args:
- *   E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax   
+ *   E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax
  */
 
 {{common_header}}
diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 9dd90c0..03b5588 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -286,13 +286,13 @@ class Simulation(object):
             alpha_max = pml['cfs_alpha']
 
             def par(x):
-                scaling = ((x / (pml['thickness'])) ** pml['m'])
+                scaling = (x / pml['thickness']) ** pml['m']
                 sigma = scaling * sigma_max
                 kappa = 1 + scaling * (kappa_max - 1)
                 alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
                 p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt)
                 p1 = sigma / (sigma + kappa * alpha) * (p0 - 1)
-                p2 = 1/kappa
+                p2 = 1 / kappa
                 return p0, p1, p2
 
             xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2))

From d5fd78d4933cb65f04d85eed7fca5e3264c934f8 Mon Sep 17 00:00:00 2001
From: Jan Petykiewicz <anewusername@gmail.com>
Date: Wed, 17 Jul 2019 00:53:55 -0700
Subject: [PATCH 33/33] Revert "Add restrict keyword to pointers (not sharing
 the same memory for multiple fields)"

This reverts commit 60b70bb332d29fb5a2aecb801517be659cf4daf9.

It appears to have minimal performance impact, and fails to compile on
nvidia cards.
---
 opencl_fdtd/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py
index 03b5588..c5d0005 100644
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@@ -170,7 +170,7 @@ class Simulation(object):
         ctype = type_to_C(self.arg_type)
 
         def ptr(arg: str) -> str:
-            return ctype + ' * restrict ' + arg
+            return ctype + ' *' + arg
 
         base_fields = OrderedDict()
         base_fields[ptr('E')] = self.E