Rewrite, with the following features:

- Move to jinja2 templates for the opencl code - Combine PML code into the E, H updates for speed - Add Poynting vector calculation code, including precalculation during H update - Use arrays for PML parameters (p0, p1) - Switch to linearized, C-ordered fields (~50% performance boost??) - Added jinja2 and fdfd_tools dependencies
2016-09-01 14:39:44 -07:00 · 2016-09-01 14:39:44 -07:00 · d34c478f1d
commit d34c478f1d
parent cd72219d0b
9 changed files with 511 additions and 407 deletions
--- a/fdtd.py
+++ b/fdtd.py
@ -8,12 +8,17 @@ import sys
 import time
 import numpy
-import h5py
+import lzma 
 import dill
 from fdtd.simulation import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
 import fdfd_tools
 __author__ = 'Jan Petykiewicz'
 def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
@ -75,7 +80,7 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
 def main():
    max_t = 8000            # number of timesteps
-    dx = 40                 # discretization (nm/cell)
+    dx = 25                 # discretization (nm/cell)
    pml_thickness = 8       # (number of cells)
    wl = 1550               # Excitation wavelength and fwhm
@ -114,19 +119,9 @@ def main():
                       eps=n_air**2,
                       polygons=mask.as_polygons())
-    print(grid.shape)
+    print('grid shape: {}'.format(grid.shape))
    # #### Create the simulation grid ####
-    sim = Simulation(grid.grids)
+    sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
    # Conducting boundaries and pmls in every direction
    c_args = []
    pml_args = []
    for d in (0, 1, 2):
        for p in (-1, 1):
            c_args += [{'direction': d, 'polarity': p}]
            pml_args += [{'direction': d, 'polarity': p, 'epsilon_eff': n_slab**2}]
    sim.init_conductors(c_args)
    sim.init_cpml(pml_args)
    # Source parameters and function
    w = 2 * numpy.pi * dx / wl
@ -137,31 +132,44 @@ def main():
    def field_source(i):
        t0 = i * sim.dt - delay
        return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2)
    with open('sources.c', 'w') as f:
        f.write(sim.sources['E'])
        f.write('\n==========================================\n')
        f.write(sim.sources['H'])
        if sim.update_S:
            f.write('\n==========================================\n')
            f.write(sim.sources['S'])
    # #### Run a bunch of iterations ####
    # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued immediately and run
    #  once prev_event is finished.
    output_file = h5py.File('simulation_output.h5', 'w')
    start = time.perf_counter()
    for t in range(max_t):
-        event = sim.cpml_E([])
+        sim.update_E([]).wait()
        sim.update_E([event]).wait()
-        sim.E[1][tuple(grid.shape//2)] += field_source(t)
+        ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
-        event = sim.conductor_E([])
+        sim.E[ind] += field_source(t)
-        event = sim.cpml_H([event])
+        e = sim.update_H([])
-        event = sim.update_H([event])
+        if sim.update_S:
-        sim.conductor_H([event]).wait()
+            e = sim.update_S([e])
        e.wait()
-        print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+        if t % 100 == 0:
-        sys.stdout.flush()
+            print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
            sys.stdout.flush()
-        # Save field slices
+    with lzma.open('saved_simulation', 'wb') as f:
-        if (t % 20 == 0 and (max_t - t < 1000 or t < 1000)) or t == max_t-1:
+        def unvec(f):
-            print('saving E-field')
+            return fdfd_tools.unvec(f, grid.shape)
-            for j, f in enumerate(sim.E):
+        d = {
-                a = f.get()
+            'grid': grid,
-                output_file['/E{}_t{}'.format('xyz'[j], t)] = a[:, :, round(a.shape[2]/2)]
+            'E': unvec(sim.E.get()),
            'H': unvec(sim.H.get()),
            }
        if sim.S is not None:
            d['S'] = unvec(sim.S.get())
        dill.dump(d, f)
 if __name__ == '__main__':
    main()
--- a/fdtd/base.py
+++ b/fdtd/base.py
@ -1,72 +0,0 @@
 """
 Basic code snippets for opencl FDTD
 """
 from typing import List
 import numpy
 def shape_source(shape: List[int] or numpy.ndarray) -> str:
    """
    Defines sx, sy, sz C constants specifying the shape of the grid in each of the 3 dimensions.
    :param shape: [sx, sy, sz] values.
    :return: String containing C source.
    """
    sxyz = """
 // Field sizes
 const int sx = {shape[0]};
 const int sy = {shape[1]};
 const int sz = {shape[2]};
 """.format(shape=shape)
    return sxyz
 # Defines dix, diy, diz constants used for stepping in the x, y, z directions in a linear array
 #  (ie, given Ex[i] referring to position (x, y, z), Ex[i+diy] will refer to position (x, y+1, z))
 dixyz_source = """
 // Convert offset in field xyz to linear index offset
 const int dix = sz * sy;
 const int diy = sz;
 const int diz = 1;
 """
 # Given a linear index i and shape sx, sy, sz, defines x, y, and z as the 3D indices of the current element (i).
 xyz_source = """
 // Convert linear index to field index (xyz)
 const int x = i / (sz * sy);
 const int y = (i - x * sz * sy) / sz;
 const int z = (i - y * sz - x * sz * sy);
 """
 # Source code for updating the E field; maxes use of dixyz_source.
 maxwell_E_source = """
 // E update equations
 Ex[i] += dt / epsx[i] * ((Hz[i] - Hz[i-diy]) - (Hy[i] - Hy[i-diz]));
 Ey[i] += dt / epsy[i] * ((Hx[i] - Hx[i-diz]) - (Hz[i] - Hz[i-dix]));
 Ez[i] += dt / epsz[i] * ((Hy[i] - Hy[i-dix]) - (Hx[i] - Hx[i-diy]));
 """
 # Source code for updating the H field; maxes use of dixyz_source and assumes mu=0
 maxwell_H_source = """
 // H update equations
 Hx[i] -= dt * ((Ez[i+diy] - Ez[i]) - (Ey[i+diz] - Ey[i]));
 Hy[i] -= dt * ((Ex[i+diz] - Ex[i]) - (Ez[i+dix] - Ez[i]));
 Hz[i] -= dt * ((Ey[i+dix] - Ey[i]) - (Ex[i+diy] - Ex[i]));
 """
 def type_to_C(float_type: numpy.float32 or numpy.float64) -> str:
    """
    Returns a string corresponding to the C equivalent of a numpy type.
    Only works for float32 and float64.
    :param float_type: numpy.float32 or numpy.float64
    :return: string containing the corresponding C type (eg. 'double')
    """
    if float_type == numpy.float32:
        arg_type = 'float'
    elif float_type == numpy.float64:
        arg_type = 'double'
    else:
        raise Exception('Unsupported type')
    return arg_type
--- a/fdtd/boundary.py
+++ b/fdtd/boundary.py
@ -1,185 +0,0 @@
 from typing import List, Dict
 import numpy
 def conductor(direction: int,
              polarity: int,
              ) -> List[str]:
    """
    Create source code for conducting boundary conditions.
    :param direction: integer in range(3), corresponding to x,y,z.
    :param polarity: -1 or 1, specifying eg. a -x or +x boundary.
    :return: [E_source, H_source] source code for E and H boundary update steps.
    """
    if direction not in range(3):
        raise Exception('Invalid direction: {}'.format(direction))
    if polarity not in (-1, 1):
        raise Exception('Invalid polarity: {}'.format(polarity))
    r = 'xyz'[direction]
    uv = 'xyz'.replace(r, '')
    if polarity < 0:
        bc_e = """
 if ({r} == 0) {{
    E{r}[i] = 0;
    E{u}[i] = E{u}[i+di{r}];
    E{v}[i] = E{v}[i+di{r}];
 }}
 """
        bc_h = """
 if ({r} == 0) {{
    H{r}[i] = H{r}[i+di{r}];
    H{u}[i] = 0;
    H{v}[i] = 0;
 }}
 """
    elif polarity > 0:
        bc_e = """
 if ({r} == s{r} - 1) {{
    E{r}[i] = -E{r}[i-2*di{r}];
    E{u}[i] = +E{u}[i-di{r}];
    E{v}[i] = +E{v}[i-di{r}];
 }} else if ({r} == s{r} - 2) {{
    E{r}[i] = 0;
 }}
 """
        bc_h = """
 if ({r} == s{r} - 1) {{
    H{r}[i] = +H{r}[i-di{r}];
    H{u}[i] = -H{u}[i-2*di{r}];
    H{v}[i] = -H{v}[i-2*di{r}];
 }} else if ({r} == s{r} - 2) {{
    H{u}[i] = 0;
    H{v}[i] = 0;
 }}
 """
    else:
        raise Exception()
    replacements = {'r': r, 'u': uv[0], 'v': uv[1]}
    return [s.format(**replacements) for s in (bc_e, bc_h)]
 def cpml(direction: int,
         polarity: int,
         dt: float,
         thickness: int=8,
         epsilon_eff: float=1,
         ) -> Dict:
    """
    Generate source code for complex phase matched layer (cpml) absorbing boundaries.
    These are not full boundary conditions and require a conducting boundary to be added
     in the same direction as the pml.
    :param direction: integer in range(3), corresponding to x, y, z directions.
    :param polarity: -1 or 1, corresponding to eg. -x or +x direction.
    :param dt: timestep used by the simulation
    :param thickness: Number of cells used by the pml (the grid is NOT expanded to add these cells). Default 8.
    :param epsilon_eff: Effective epsilon_r of the pml layer. Default 1.
    :return: Dict with entries 'E', 'H' (update equations for E and H) and 'psi_E', 'psi_H' (lists of str,
            specifying the field names of the cpml fields used in the E and H update steps. Eg.,
            Psi_xn_Ex for the complex Ex component for the x- pml.)
    """
    if direction not in range(3):
        raise Exception('Invalid direction: {}'.format(direction))
    if polarity not in (-1, 1):
        raise Exception('Invalid polarity: {}'.format(polarity))
    if thickness <= 2:
        raise Exception('It would be wise to have a pml with 4+ cells of thickness')
    if epsilon_eff <= 0:
        raise Exception('epsilon_eff must be positive')
    m = (3.5, 1)
    sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(epsilon_eff)
    alpha_max = 0  # TODO: Decide what to do about non-zero alpha
    transverse = numpy.delete(range(3), direction)
    r = 'xyz'[direction]
    np = 'nVp'[numpy.sign(polarity)+1]
    uv = ['xyz'[i] for i in transverse]
    xe = numpy.arange(1, thickness+1, dtype=float)[::-1]
    xh = numpy.arange(1, thickness+1, dtype=float)[::-1]
    if polarity > 0:
        xe -= 0.5
    elif polarity < 0:
        xh -= 0.5
    def par(x):
        sigma = ((x / thickness) ** m[0]) * sigma_max
        alpha = ((1 - x / thickness) ** m[1]) * alpha_max
        p0 = numpy.exp(-(sigma + alpha) * dt)
        p1 = sigma / (sigma + alpha) * (p0 - 1)
        return p0, p1
    p0e, p1e = par(xe)
    p0h, p1h = par(xh)
    vals = {'r': r,
            'u': uv[0],
            'v': uv[1],
            'np':   np,
            'th':   thickness,
            'p0e': ', '.join((str(x) for x in p0e)),
            'p1e': ', '.join((str(x) for x in p1e)),
            'p0h': ', '.join((str(x) for x in p0h)),
            'p1h': ', '.join((str(x) for x in p1h)),
            'se': '-+'[direction % 2],
            'sh': '+-'[direction % 2]}
    if polarity < 0:
        bounds_if = """
 if ( 0 < {r} && {r} < {th} + 1 ) {{
    const int ir = {r} - 1;                             // index into pml parameters
    const int ip = {v} + {u} * s{v} + ir * s{v} * s{u};   // linear index into Psi
 """
    elif polarity > 0:
        bounds_if = """
 if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{
    const int ir = (s{r} - 1) - ({r} + 1);              // index into pml parameters
    const int ip = {v} + {u} * s{v} + ir * s{v} * s{u};   // linear index into Psi
 """
    else:
        raise Exception('Bad polarity (=0)')
    code_e = """
    // pml parameters:
    const float p0[{th}] = {{ {p0e} }};
    const float p1[{th}] = {{ {p1e} }};
    Psi_{r}{np}_E{u}[ip] = p0[ir] * Psi_{r}{np}_E{u}[ip] + p1[ir] * (H{v}[i] - H{v}[i-di{r}]);
    Psi_{r}{np}_E{v}[ip] = p0[ir] * Psi_{r}{np}_E{v}[ip] + p1[ir] * (H{u}[i] - H{u}[i-di{r}]);
    E{u}[i] {se}= dt / eps{u}[i] * Psi_{r}{np}_E{u}[ip];
    E{v}[i] {sh}= dt / eps{v}[i] * Psi_{r}{np}_E{v}[ip];
 }}
 """
    code_h = """
    // pml parameters:
    const float p0[{th}] = {{ {p0h} }};
    const float p1[{th}] = {{ {p1h} }};
    Psi_{r}{np}_H{u}[ip] = p0[ir] * Psi_{r}{np}_H{u}[ip] + p1[ir] * (E{v}[i+di{r}] - E{v}[i]);
    Psi_{r}{np}_H{v}[ip] = p0[ir] * Psi_{r}{np}_H{v}[ip] + p1[ir] * (E{u}[i+di{r}] - E{u}[i]);
    H{u}[i] {sh}= dt * Psi_{r}{np}_H{u}[ip];
    H{v}[i] {se}= dt * Psi_{r}{np}_H{v}[ip];
 }}
 """
    pml_data = {
        'E': (bounds_if + code_e).format(**vals),
        'H': (bounds_if + code_h).format(**vals),
        'psi_E': ['Psi_{r}{np}_E{u}'.format(**vals),
                  'Psi_{r}{np}_E{v}'.format(**vals)],
        'psi_H': ['Psi_{r}{np}_H{u}'.format(**vals),
                  'Psi_{r}{np}_H{v}'.format(**vals)],
    }
    return pml_data
--- a/fdtd/kernels/common.cl
+++ b/fdtd/kernels/common.cl
@ -0,0 +1,84 @@
 {#
 /* Common code for E, H updates
 *
 * Template parameters:
 *  ftype	type name (e.g. float or double)
 *  shape       list of 3 ints specifying shape of fields
 */
 #}
 typedef {{ftype}} ftype;
 /*
 * Field size info
 */
 const size_t sx = {{shape[0]}};
 const size_t sy = {{shape[1]}};
 const size_t sz = {{shape[2]}};
 const size_t field_size = sx * sy * sz;
 //Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
 // i is outside the bounds of Ex[].
 if (i >= field_size) {
    PYOPENCL_ELWISE_CONTINUE;
 }
 /*
 * Array indexing
 */
 // Given a linear index i and shape (sx, sy, sz), defines x, y, and z
 //  as the 3D indices of the current element (i).
 // (ie, converts linear index [i] to field indices (x, y, z)
 const size_t x = i / (sz * sy);
 const size_t y = (i - x * sz * sy) / sz;
 const size_t z = (i - y * sz - x * sz * sy);
 // Calculate linear index offsets corresponding to offsets in 3D
 // (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z)
 const size_t dix = sz * sy;
 const size_t diy = sz;
 const size_t diz = 1;
 /*
 * Pointer math
 */
 //Pointer offsets into the components of a linearized vector-field
 // (eg. Hx = H + XX, where H and Hx are pointers)
 const size_t XX = 0;
 const size_t YY = field_size;
 const size_t ZZ = field_size * 2;
 //Define pointers to vector components of each field (eg. Hx = H + XX)
 __global ftype *Ex = E + XX;
 __global ftype *Ey = E + YY;
 __global ftype *Ez = E + ZZ;
 __global ftype *Hx = H + XX;
 __global ftype *Hy = H + YY;
 __global ftype *Hz = H + ZZ;
 /*
 * Implement periodic boundary conditions
 *
 * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction.
 * In the event that we start at x == 0, we actually want to wrap around and grab the cell
 * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix .
 *
 * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction.
 * In the event that we start at x == (sx - 1), we actually want to wrap around and grab
 * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
 */
 {% for r in 'xyz' %}
 int m{{r}} = -di{{r}};
 int p{{r}} = +di{{r}};
 int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
 if ( {{r}} == 0 ) {
  m{{r}} = wrap_{{r}};
 } else if ( {{r}} == s{{r}} - 1 ) {
  p{{r}} = -wrap_{{r}};
 } 
 {% endfor %}
--- a/fdtd/kernels/update_e.cl
+++ b/fdtd/kernels/update_e.cl
@ -0,0 +1,78 @@
 /*
 *  Update E-field, including any PMLs.
 *  
 *  Template parameters:
 *   common_header: Rendered contents of common.cl
 *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
 *   pml_thickness: Number of cells (integer)
 *   
 *  OpenCL args:
 *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
 */
 {{common_header}}
 ////////////////////////////////////////////////////////////////////////////
 __global ftype *epsx = eps + XX;
 __global ftype *epsy = eps + YY;
 __global ftype *epsz = eps + ZZ;
 {% if pmls -%}
 const int pml_thickness = {{pml_thickness}};
 {%- endif %}
 /*
 *   Precalclate derivatives
 */
 ftype dHxy = Hx[i] - Hx[i + my];
 ftype dHxz = Hx[i] - Hx[i + mz];
 ftype dHyx = Hy[i] - Hy[i + mx];
 ftype dHyz = Hy[i] - Hy[i + mz];
 ftype dHzx = Hz[i] - Hz[i + mx];
 ftype dHzy = Hz[i] - Hz[i + my];
 /*
 *   PML Update
 */
 // PML effects on E
 ftype pExi = 0;
 ftype pEyi = 0;
 ftype pEzi = 0;
 {% for r, p in pmls -%}
    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
    {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
    {%- if r != 'y' -%}
        {%- set se, sh = '-', '+' -%}
    {%- else -%}
        {%- set se, sh = '+', '-' -%}
    {%- endif -%}
    {%- if p == 'n' %}
 if ( {{r}} < pml_thickness ) {
    const size_t ir = {{r}};                          // index into pml parameters
    {%- elif p == 'p' %}
 if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
    {%- endif %}
    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
    {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
    {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
    pE{{u}}i {{se}}= {{psi ~ u}}[ip];
    pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
 }
 {%- endfor %}
 /*
 *  Update E
 */
 Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
 Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
 Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
--- a/fdtd/kernels/update_h.cl
+++ b/fdtd/kernels/update_h.cl
@ -0,0 +1,125 @@
 /*
 *  Update H-field, including any PMLs.
 *   Also precalculate values for poynting vector if necessary.
 *  
 *  Template parameters:
 *   common_header: Rendered contents of common.cl
 *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
 *   pml_thickness: Number of cells (integer)
 *   do_poynting: Whether to precalculate poynting vector components (boolean)  
 *
 *  OpenCL args:
 *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
 */
 {{common_header}}
 ////////////////////////////////////////////////////////////////////////////
 {% if pmls -%}
 const int pml_thickness = {{pml_thickness}};
 {%- endif %}
 /*
 *  Precalculate derivatives
 */
 ftype dExy = Ex[i + py] - Ex[i];
 ftype dExz = Ex[i + pz] - Ex[i];
 ftype dEyx = Ey[i + px] - Ey[i];
 ftype dEyz = Ey[i + pz] - Ey[i];
 ftype dEzx = Ez[i + px] - Ez[i];
 ftype dEzy = Ez[i + py] - Ez[i];
 {%- if do_poynting %}
 /*
 *  Precalculate averaged E
 */
 ftype aExy = Ex[i + py] + Ex[i];
 ftype aExz = Ex[i + pz] + Ex[i];
 ftype aEyx = Ey[i + px] + Ey[i];
 ftype aEyz = Ey[i + pz] + Ey[i];
 ftype aEzx = Ez[i + px] + Ez[i];
 ftype aEzy = Ez[i + py] + Ez[i];
 {%- endif %}
 /*
 *  PML Update
 */
 // PML contributions to H
 ftype pHxi = 0;
 ftype pHyi = 0;
 ftype pHzi = 0;
 {%- for r, p in pmls -%}
    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
    {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
    {%- if r != 'y' -%}
        {%- set se, sh = '-', '+' -%}
    {%- else -%}
        {%- set se, sh = '+', '-' -%}
    {%- endif -%}
    {%- if p == 'n' %}
 if ( {{r}} < pml_thickness ) {
    const size_t ir = {{r}};                          // index into pml parameters
    {%- elif p == 'p' %}
 if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
    {%- endif %}
    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
    {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
    {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
    pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
    pH{{v}}i {{se}}= {{psi ~ v}}[ip];
 }
 {%- endfor %}
 /*
 *  Update H
 */
 {% if do_poynting -%}
 // Save old H for averaging
 ftype Hx_old = Hx[i];
 ftype Hy_old = Hy[i];
 ftype Hz_old = Hz[i];
 {%- endif %}
 // H update equations
 Hx[i] -= dt * (dEzy - dEyz + pHxi);
 Hy[i] -= dt * (dExz - dEzx + pHyi);
 Hz[i] -= dt * (dEyx - dExy + pHzi);
 {% if do_poynting -%}
 // Average H across timesteps
 ftype aHxt = Hx[i] + Hx_old;
 ftype aHyt = Hy[i] + Hy_old;
 ftype aHzt = Hz[i] + Hz_old;
 /*
 *  Calculate unscaled S components at H locations
 */
 __global ftype *oSxy = oS + 0 * field_size;
 __global ftype *oSyz = oS + 1 * field_size;
 __global ftype *oSzx = oS + 2 * field_size;
 __global ftype *oSxz = oS + 3 * field_size;
 __global ftype *oSyx = oS + 4 * field_size;
 __global ftype *oSzy = oS + 5 * field_size;
 oSxy[i] = aEyx * aHzt;
 oSxz[i] = -aEzx * aHyt;
 oSyz[i] = aEzy * aHxt;
 oSyx[i] = -aExy * aHzt;
 oSzx[i] = aExz * aHyt;
 oSzy[i] = -aEyz * aHxt;
 {%- endif -%}
--- a/fdtd/kernels/update_s.cl
+++ b/fdtd/kernels/update_s.cl
@ -0,0 +1,36 @@
 /*
 *  Update E-field, including any PMLs.
 *  
 *  Template parameters:
 *   common_header: Rendered contents of common.cl
 *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
 *   pml_thickness: Number of cells (integer)
 *   
 *  OpenCL args:
 *   E, H, dt, S, oS
 */
 {{common_header}}
 //////////////////////////////////////////////////////////////////////
 /*
 * Calculate S from oS (pre-calculated components)
 */ 
 __global ftype *Sx = S + XX;
 __global ftype *Sy = S + YY;
 __global ftype *Sz = S + ZZ;
 // Use unscaled S components from H locations 
 __global ftype *oSxy = oS + 0 * field_size;
 __global ftype *oSyz = oS + 1 * field_size;
 __global ftype *oSzx = oS + 2 * field_size;
 __global ftype *oSxz = oS + 3 * field_size;
 __global ftype *oSyx = oS + 4 * field_size;
 __global ftype *oSzy = oS + 5 * field_size;
 ftype s_factor = dt * 0.125;
 Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
 Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
 Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;
--- a/fdtd/simulation.py
+++ b/fdtd/simulation.py
@ -3,15 +3,23 @@ Class for constructing and holding the basic FDTD operations and fields
 """
 from typing import List, Dict, Callable
 from collections import OrderedDict
 import numpy
 import jinja2
 import warnings
 import pyopencl
 import pyopencl.array
 from pyopencl.elementwise import ElementwiseKernel
-from . import boundary, base
+from fdfd_tools import vec
-from .base import type_to_C
+
 __author__ = 'Jan Petykiewicz'
 # Create jinja2 env on module load
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
 class Simulation(object):
@ -20,6 +28,7 @@ class Simulation(object):
    """
    E = None    # type: List[pyopencl.array.Array]
    H = None    # type: List[pyopencl.array.Array]
    S = None    # type: List[pyopencl.array.Array]
    eps = None  # type: List[pyopencl.array.Array]
    dt = None   # type: float
@ -30,24 +39,20 @@ class Simulation(object):
    update_E = None     # type: Callable[[],pyopencl.Event]
    update_H = None     # type: Callable[[],pyopencl.Event]
-
+    update_S = None     # type: Callable[[],pyopencl.Event]
-    conductor_E = None  # type: Callable[[],pyopencl.Event]
+    sources = None      # type: Dict[str, str]
    conductor_H = None  # type: Callable[[],pyopencl.Event]
    cpml_E = None       # type: Callable[[],pyopencl.Event]
    cpml_H = None       # type: Callable[[],pyopencl.Event]
    cpml_psi_E = None   # type: Dict[str, pyopencl.array.Array]
    cpml_psi_H = None   # type: Dict[str, pyopencl.array.Array]
    def __init__(self,
                 epsilon: List[numpy.ndarray],
-                 dt: float=.99/numpy.sqrt(3),
+                 dt: float = .99/numpy.sqrt(3),
-                 initial_E: List[numpy.ndarray]=None,
+                 initial_E: List[numpy.ndarray] = None,
-                 initial_H: List[numpy.ndarray]=None,
+                 initial_H: List[numpy.ndarray] = None,
-                 context: pyopencl.Context=None,
+                 context: pyopencl.Context = None,
-                 queue: pyopencl.CommandQueue=None,
+                 queue: pyopencl.CommandQueue = None,
-                 float_type: numpy.float32 or numpy.float64=numpy.float32):
+                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
                 pml_thickness: int = 10,
                 pmls: List[List[str]] = None,
                 do_poynting: bool = True):
        """
        Initialize the simulation.
@ -67,7 +72,7 @@ class Simulation(object):
            Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
        if context is None:
-            self.context = pyopencl.create_some_context(False)
+            self.context = pyopencl.create_some_context()
        else:
            self.context = context
@ -84,126 +89,149 @@ class Simulation(object):
            self.dt = dt
        self.arg_type = float_type
-
+        self.sources = {}
-        self.eps = [pyopencl.array.to_device(self.queue, e.astype(float_type)) for e in epsilon]
+        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type))
        if initial_E is None:
-            self.E = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)]
+            self.E = pyopencl.array.zeros_like(self.eps)
        else:
            if len(initial_E) != 3:
                Exception('Initial_E must be a list of length 3')
            if not all((E.shape == epsilon[0].shape for E in initial_E)):
                Exception('Initial_E list elements must have same shape as epsilon elements')
-            self.E = [pyopencl.array.to_device(self.queue, E.astype(float_type)) for E in initial_E]
+            self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
        if initial_H is None:
-            self.H = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)]
+            self.H = pyopencl.array.zeros_like(self.eps)
        else:
            if len(initial_H) != 3:
                Exception('Initial_H must be a list of length 3')
            if not all((H.shape == epsilon[0].shape for H in initial_H)):
                Exception('Initial_H list elements must have same shape as epsilon elements')
-            self.H = [pyopencl.array.to_device(self.queue, H.astype(float_type)) for H in initial_H]
+            self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
        if pmls is None:
            pmls = [[d, p] for d in 'xyz' for p in 'np']
        ctype = type_to_C(self.arg_type)
-        E_args = [ctype + ' *E' + c for c in 'xyz']
+        def ptr(arg: str) -> str:
-        H_args = [ctype + ' *H' + c for c in 'xyz']
+            return ctype + ' *' + arg
        eps_args = [ctype + ' *eps' + c for c in 'xyz']
        dt_arg = [ctype + ' dt']
-        sxyz = base.shape_source(epsilon[0].shape)
+        base_fields = OrderedDict()
-        E_source = sxyz + base.dixyz_source + base.maxwell_E_source
+        base_fields[ptr('E')] = self.E
-        H_source = sxyz + base.dixyz_source + base.maxwell_H_source
+        base_fields[ptr('H')] = self.H
        base_fields[ctype + ' dt'] = self.dt
        eps_field = OrderedDict()
        eps_field[ptr('eps')] = self.eps
        common_source = jinja_env.get_template('common.cl').render(
                ftype=ctype,
                shape=epsilon[0].shape,
                )
        jinja_args = {
                'common_header': common_source,
                'pml_thickness': pml_thickness,
                'pmls': pmls,
                'do_poynting': do_poynting,
                }
        E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
        H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
        self.sources['E'] = E_source
        self.sources['H'] = H_source
        if do_poynting:
            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
            self.sources['S'] = S_source
            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type)
            self.S = pyopencl.array.zeros_like(self.E)
            S_fields = OrderedDict()
            S_fields[ptr('oS')] = self.oS
            S_fields[ptr('S')] = self.S
        else:
            S_fields = OrderedDict() 
        '''
        PML
        '''
        m = (3.5, 1)
        sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0)
        alpha_max = 0  # TODO: Decide what to do about non-zero alpha
        def par(x):
            sigma = ((x / pml_thickness) ** m[0]) * sigma_max
            alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max
            p0 = numpy.exp(-(sigma + alpha) * dt)
            p1 = sigma / (sigma + alpha) * (p0 - 1)
            return p0, p1
        xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
        xep -= 0.5
        xhn -= 0.5
        pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh']
        pml_e_fields = OrderedDict()
        pml_h_fields = OrderedDict()
        for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)):
            pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe)
            pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph)
        for pml in pmls:
            uv = 'xyz'.replace(pml[0], '')
            psi_base = 'Psi_' + ''.join(pml) + '_'
            psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
            psi_shape = list(epsilon[0].shape)
            psi_shape['xyz'.find(pml[0])] = pml_thickness
            for ne, nh in zip(*psi_names):
                pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
                pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
        self.pml_e_fields = pml_e_fields
        self.pml_h_fields = pml_h_fields
        '''
        Create operations
        '''
        E_args = OrderedDict()
        [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)]
        E_update = ElementwiseKernel(self.context, operation=E_source,
-                                     arguments=', '.join(E_args + H_args + dt_arg + eps_args))
+                                     arguments=', '.join(E_args.keys()))
        H_args = OrderedDict()
        [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)]
        H_update = ElementwiseKernel(self.context, operation=H_source,
-                                     arguments=', '.join(E_args + H_args + dt_arg))
+                                     arguments=', '.join(H_args.keys()))
        self.update_E = lambda e: E_update(*E_args.values(), wait_for=e)
        self.update_H = lambda e: H_update(*H_args.values(), wait_for=e)
-        self.update_E = lambda e: E_update(*self.E, *self.H, self.dt, *self.eps, wait_for=e)
+        if do_poynting:
-        self.update_H = lambda e: H_update(*self.E, *self.H, self.dt, wait_for=e)
+            S_args = OrderedDict()
            [S_args.update(d) for d in (base_fields, S_fields)]
            S_update = ElementwiseKernel(self.context, operation=S_source,
                                         arguments=', '.join(S_args.keys()))
-    def init_cpml(self, pml_args: List[Dict]):
+            self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
        """
        Initialize absorbing layers (cpml: complex phase matched layer). PMLs are not actual
         boundary conditions, so you should add a conducting boundary (.init_conductors()) for
         all directions in which you add PMLs.
        Allows use of self.cpml_E(events) and self.cpml_H(events).
        All necessary additional fields are created on the opencl device.
        :param pml_args: A list containing dictionaries which are passed to .boundary.cpml(...).
            The dt argument is set automatically, but the others must be passed in each entry
             of pml_args.
        """
        sxyz = base.shape_source(self.eps[0].shape)
-        # Prepare per-iteration constants for later use
+def type_to_C(float_type) -> str:
-        pml_E_source = sxyz + base.dixyz_source + base.xyz_source
+    """
-        pml_H_source = sxyz + base.dixyz_source + base.xyz_source
+    Returns a string corresponding to the C equivalent of a numpy type.
    Only works for float16, float32, float64.
-        psi_E = []
+    :param float_type: e.g. numpy.float32
-        psi_H = []
+    :return: string containing the corresponding C type (eg. 'double')
-        psi_E_names = []
+    """
-        psi_H_names = []
+    if float_type == numpy.float16:
-        for arg_set in pml_args:
+        arg_type = 'half'
-            pml_data = boundary.cpml(dt=self.dt, **arg_set)
+    elif float_type == numpy.float32:
-
+        arg_type = 'float'
-            pml_E_source += pml_data['E']
+    elif float_type == numpy.float64:
-            pml_H_source += pml_data['H']
+        arg_type = 'double'
-
+    else:
-            ti = numpy.delete(range(3), arg_set['direction'])
+        raise Exception('Unsupported type')
-            trans = [self.eps[0].shape[i] for i in ti]
+    return arg_type
            psi_shape = (8, trans[0], trans[1])
            psi_E += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type)
                      for _ in pml_data['psi_E']]
            psi_H += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type)
                      for _ in pml_data['psi_H']]
            psi_E_names += pml_data['psi_E']
            psi_H_names += pml_data['psi_H']
        ctype = type_to_C(self.arg_type)
        E_args = [ctype + ' *E' + c for c in 'xyz']
        H_args = [ctype + ' *H' + c for c in 'xyz']
        eps_args = [ctype + ' *eps' + c for c in 'xyz']
        dt_arg = [ctype + ' dt']
        arglist_E = [ctype + ' *' + psi for psi in psi_E_names]
        arglist_H = [ctype + ' *' + psi for psi in psi_H_names]
        pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E)
        pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H)
        pml_E = ElementwiseKernel(self.context, arguments=pml_E_args, operation=pml_E_source)
        pml_H = ElementwiseKernel(self.context, arguments=pml_H_args, operation=pml_H_source)
        self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, wait_for=e)
        self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, wait_for=e)
        self.cmpl_psi_E = {k: v for k, v in zip(psi_E_names, psi_E)}
        self.cmpl_psi_H = {k: v for k, v in zip(psi_H_names, psi_H)}
    def init_conductors(self, conductor_args: List[Dict]):
        """
        Initialize reflecting boundary conditions.
        Allows use of self.conductor_E(events) and self.conductor_H(events).
        :param conductor_args: List of dictionaries with which to call .boundary.conductor(...).
        """
        sxyz = base.shape_source(self.eps[0].shape)
        # Prepare per-iteration constants
        bc_E_source = sxyz + base.dixyz_source + base.xyz_source
        bc_H_source = sxyz + base.dixyz_source + base.xyz_source
        for arg_set in conductor_args:
            [e, h] = boundary.conductor(**arg_set)
            bc_E_source += e
            bc_H_source += h
        E_args = [type_to_C(self.arg_type) + ' *E' + c for c in 'xyz']
        H_args = [type_to_C(self.arg_type) + ' *H' + c for c in 'xyz']
        bc_E = ElementwiseKernel(self.context, arguments=E_args, operation=bc_E_source)
        bc_H = ElementwiseKernel(self.context, arguments=H_args, operation=bc_H_source)
        self.conductor_E = lambda e: bc_E(*self.E, wait_for=e)
        self.conductor_H = lambda e: bc_H(*self.H, wait_for=e)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,8 @@
 numpy
 h5py
 pyopencl
 jinja2
 -e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster
 -e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock
 -e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque
 -e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools