Initial commit

2016-03-30 15:00:00 -07:00 · 2016-03-30 15:00:00 -07:00 · 66d05ca830
commit 66d05ca830
13 changed files with 880 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+.idea/
+.pyc
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,34 @@
+# opencl-fdtd
+
+**opencl-fdtd** is a python package for running 3D time-domain electromagnetic
+simulations on parallel compute hardware (mainly GPUs).
+
+**Performance** highly depends on what hardware you have available:
+* A 395x345x73 cell simulation (~10 million points, 8-cell absorbing boundaries)
+ runs at around 42 iterations/sec. on my Nvidia GTX 580.
+* On my laptop (Nvidia 940M) the same simulation achieves ~8 iterations/sec.
+* An L3 photonic crystal cavity ringdown simulation (1550nm source, 40nm
+ discretization, 8000 steps) takes about 5 minutes on my laptop.
+
+**Capabilities** are currently pretty minimal:
+* Absorbing boundaries (CPML)
+* Conducting boundaries (PMC)
+* Anisotropic media (eps_xx, eps_yy, eps_zz)
+* Direct access to fields (eg., you can trivially add a soft or hard
+ current source with just sim.E[1] += sin(f0 * t), or save any portion
+ of a field to a file)
+
+## Installation
+
+**Requirements:**
+* python 3 (written and tested with 3.5)
+* numpy
+* pyopencl
+* h5py (for file output)
+* [gridlock](https://mpxd.net/gogs/jan/gridlock)
+* [masque](https://mpxd.net/gogs/jan/masque)
+
+You can install the requirements and their dependencies easily with
+```bash
+pip intall -r requirements.txt
+```
--- a/fdtd.py
+++ b/fdtd.py
@ -0,0 +1,164 @@
+"""
+Example code for running an OpenCL FDTD simulation
+
+See main() for simulation setup.
+"""
+
+import sys
+import time
+
+import numpy
+import h5py
+
+from fdtd.simulation import Simulation
+from masque import Pattern, shapes
+import gridlock
+import pcgen
+
+
+def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
+    """
+    Generate a masque.Pattern object containing a perturbed L3 cavity.
+
+    :param a: Lattice constant.
+    :param radius: Hole radius, in units of a (lattice constant).
+    :param kwargs: Keyword arguments:
+        hole_dose, trench_dose, hole_layer, trench_layer: Shape properties for Pattern.
+                Defaults *_dose=1, hole_layer=0, trench_layer=1.
+        shifts_a, shifts_r: passed to pcgen.l3_shift; specifies lattice constant (1 -
+                multiplicative factor) and radius (multiplicative factor) for shifting
+                holes adjacent to the defect (same row). Defaults are 0.15 shift for
+                first hole, 0.075 shift for third hole, and no radius change.
+        xy_size: [x, y] number of mirror periods in each direction; total size is
+                2 * n + 1 holes in each direction. Default [10, 10].
+        perturbed_radius: radius of holes perturbed to form an upwards-driected beam
+                (multiplicative factor). Default 1.1.
+        trench width: Width of the undercut trenches. Default 1.2e3.
+    :return: masque.Pattern object containing the L3 design
+    """
+
+    default_args = {'hole_dose':    1,
+                    'trench_dose':  1,
+                    'hole_layer':   0,
+                    'trench_layer': 1,
+                    'shifts_a':     (0.15, 0, 0.075),
+                    'shifts_r':     (1.0, 1.0, 1.0),
+                    'xy_size':      (10, 10),
+                    'perturbed_radius': 1.1,
+                    'trench_width': 1.2e3,
+                    }
+    kwargs = {**default_args, **kwargs}
+
+    xyr = pcgen.l3_shift_perturbed_defect(mirror_dims=kwargs['xy_size'],
+                                          perturbed_radius=kwargs['perturbed_radius'],
+                                          shifts_a=kwargs['shifts_a'],
+                                          shifts_r=kwargs['shifts_r'])
+    xyr *= a
+    xyr[:, 2] *= radius
+
+    pat = Pattern()
+    pat.name = 'L3p-a{:g}r{:g}rp{:g}'.format(a, radius, kwargs['perturbed_radius'])
+    pat.shapes += [shapes.Circle(radius=r, offset=(x, y),
+                                 dose=kwargs['hole_dose'],
+                                 layer=kwargs['hole_layer'])
+                   for x, y, r in xyr]
+
+    maxes = numpy.max(numpy.fabs(xyr), axis=0)
+    pat.shapes += [shapes.Polygon.rectangle(
+        lx=(2 * maxes[0]), ly=kwargs['trench_width'],
+        offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
+        dose=kwargs['trench_dose'], layer=kwargs['trench_layer'])
+                   for s in (-1, 1)]
+    return pat
+
+
+def main():
+    max_t = 8000            # number of timesteps
+
+    dx = 40                 # discretization (nm/cell)
+    pml_thickness = 8       # (number of cells)
+
+    wl = 1550               # Excitation wavelength and fwhm
+    dwl = 200
+
+    # Device design parameters
+    xy_size = numpy.array([10, 10])
+    a = 430
+    r = 0.285
+    th = 170
+
+    # refractive indices
+    n_slab = 3.408  # InGaAsP(80, 50) @ 1550nm
+    n_air = 1.0   # air
+
+    # Half-dimensions of the simulation grid
+    xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
+    z_max = 1.6 * a
+    xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
+
+    # Coordinates of the edges of the cells. The fdtd package can only do square grids at the moment.
+    half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
+    edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
+
+    # #### Create the grid, mask, and draw the device ####
+    grid = gridlock.Grid(edge_coords, initial=n_air**2, num_grids=3)
+    grid.draw_slab(surface_normal=gridlock.Direction.z,
+                   center=[0, 0, 0],
+                   thickness=th,
+                   eps=n_slab**2)
+    mask = perturbed_l3(a, r)
+    grid.draw_polygons(surface_normal=gridlock.Direction.z,
+                       center=[0, 0, 0],
+                       thickness=2 * th,
+                       eps=n_air**2,
+                       polygons=mask.as_polygons())
+
+    print(grid.shape)
+    # #### Create the simulation grid ####
+    sim = Simulation(grid.grids)
+
+    # Conducting boundaries and pmls in every direction
+    c_args = []
+    pml_args = []
+    for d in (0, 1, 2):
+        for p in (-1, 1):
+            c_args += [{'direction': d, 'polarity': p}]
+            pml_args += [{'direction': d, 'polarity': p, 'epsilon_eff': n_slab**2}]
+    sim.init_conductors(c_args)
+    sim.init_cpml(pml_args)
+
+    # Source parameters and function
+    w = 2 * numpy.pi * dx / wl
+    fwhm = dwl * w * w / (2 * numpy.pi * dx)
+    alpha = (fwhm ** 2) / 8 * numpy.log(2)
+    delay = 7/numpy.sqrt(2 * alpha)
+    field_source = lambda t: numpy.sin(w * (t * sim.dt - delay)) * \
+                             numpy.exp(-alpha * (t * sim.dt - delay)**2)
+
+    # #### Run a bunch of iterations ####
+    # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued immediately and run
+    #  once prev_event is finished.
+    output_file = h5py.File('simulation_output.h5', 'w')
+    start = time.perf_counter()
+    for t in range(max_t):
+        event = sim.cpml_E([])
+        sim.update_E([event]).wait()
+
+        sim.E[1][tuple(grid.shape//2)] += field_source(t)
+        event = sim.conductor_E([])
+        event = sim.cpml_H([event])
+        event = sim.update_H([event])
+        sim.conductor_H([event]).wait()
+
+        print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+        sys.stdout.flush()
+
+        # Save field slices
+        if (t % 20 == 0 and (max_t - t < 1000 or t < 1000)) or t == max_t-1:
+            print('saving E-field')
+            for j, f in enumerate(sim.E):
+                a = f.get()
+                output_file['/E{}_t{}'.format('xyz'[j], t)] = a[:, :, round(a.shape[2]/2)]
+
+if __name__ == '__main__':
+    main()
--- a/fdtd/init.py
+++ b/fdtd/init.py
--- a/fdtd/pycache/init.cpython-35.pyc
+++ b/fdtd/pycache/init.cpython-35.pyc
--- a/fdtd/pycache/base.cpython-35.pyc
+++ b/fdtd/pycache/base.cpython-35.pyc
--- a/fdtd/pycache/boundary.cpython-35.pyc
+++ b/fdtd/pycache/boundary.cpython-35.pyc
--- a/fdtd/pycache/simulation.cpython-35.pyc
+++ b/fdtd/pycache/simulation.cpython-35.pyc
--- a/fdtd/base.py
+++ b/fdtd/base.py
@ -0,0 +1,75 @@
+"""
+Basic code snippets for opencl FDTD
+"""
+
+from typing import List
+import numpy
+
+
+def shape_source(shape: List[int] or numpy.ndarray) -> str:
+    """
+    Defines sx, sy, sz C constants specifying the shape of the grid in each of the 3 dimensions.
+
+    :param shape: [sx, sy, sz] values.
+    :return: String containing C source.
+    """
+    sxyz = """
+// Field sizes
+const int sx = {shape[0]};
+const int sy = {shape[1]};
+const int sz = {shape[2]};
+""".format(shape=shape)
+    return sxyz
+
+# Defines dix, diy, diz constants used for stepping in the x, y, z directions in a linear array
+#  (ie, given Ex[i] referring to position (x, y, z), Ex[i+diy] will refer to position (x, y+1, z))
+dixyz_source = """
+// Convert offset in field xyz to linear index offset
+const int dix = sz * sy;
+const int diy = sz;
+const int diz = 1;
+"""
+
+# Given a linear index i and shape sx, sy, sz, defines x, y, and z as the 3D indices of the current element (i).
+xyz_source = """
+// Convert linear index to field index (xyz)
+const int x = i / (sz * sy);
+const int y = (i - x * sz * sy) / sz;
+const int z = (i - y * sz - x * sz * sy);
+"""
+
+# Source code for updating the E field; maxes use of dixyz_source.
+maxwell_E_source = """
+// E update equations
+Ex[i] += dt / epsx[i] * ((Hz[i] - Hz[i-diy]) - (Hy[i] - Hy[i-diz]));
+Ey[i] += dt / epsy[i] * ((Hx[i] - Hx[i-diz]) - (Hz[i] - Hz[i-dix]));
+Ez[i] += dt / epsz[i] * ((Hy[i] - Hy[i-dix]) - (Hx[i] - Hx[i-diy]));
+"""
+
+# Source code for updating the H field; maxes use of dixyz_source and assumes mu=0
+maxwell_H_source = """
+// H update equations
+Hx[i] -= dt * ((Ez[i+diy] - Ez[i]) - (Ey[i+diz] - Ey[i]));
+Hy[i] -= dt * ((Ex[i+diz] - Ex[i]) - (Ez[i+dix] - Ez[i]));
+Hz[i] -= dt * ((Ey[i+dix] - Ey[i]) - (Ex[i+diy] - Ex[i]));
+"""
+
+
+def type_to_C(float_type: numpy.float32 or numpy.float64) -> str:
+    """
+    Returns a string corresponding to the C equivalent of a numpy type.
+    Only works for float32 and float64.
+
+    :param float_type: numpy.float32 or numpy.float64
+    :return: string containing the corresponding C type (eg. 'double')
+    """
+    if float_type == numpy.float32:
+        arg_type = 'float'
+    elif float_type == numpy.float64:
+        arg_type = 'double'
+    else:
+        raise Exception('Unsupported type')
+    return arg_type
+
+
+
--- a/fdtd/boundary.py
+++ b/fdtd/boundary.py
@ -0,0 +1,185 @@
+from typing import List, Dict
+import numpy
+
+
+def conductor(direction: int,
+              polarity: int,
+              ) -> List[str]:
+    """
+    Create source code for conducting boundary conditions.
+
+    :param direction: integer in range(3), corresponding to x,y,z.
+    :param polarity: -1 or 1, specifying eg. a -x or +x boundary.
+    :return: [E_source, H_source] source code for E and H boundary update steps.
+    """
+    if direction not in range(3):
+        raise Exception('Invalid direction: {}'.format(direction))
+
+    if polarity not in (-1, 1):
+        raise Exception('Invalid polarity: {}'.format(polarity))
+
+    r = 'xyz'[direction]
+    uv = 'xyz'.replace(r, '')
+
+    if polarity < 0:
+        bc_E = """
+if ({r} == 0) {{
+    E{r}[i] = 0;
+    E{u}[i] = E{u}[i+di{r}];
+    E{v}[i] = E{v}[i+di{r}];
+}}
+"""
+        bc_H = """
+if ({r} == 0) {{
+    H{r}[i] = H{r}[i+di{r}];
+    H{u}[i] = 0;
+    H{v}[i] = 0;
+}}
+"""
+
+    elif polarity > 0:
+        bc_E = """
+if ({r} == s{r} - 1) {{
+    E{r}[i] = -E{r}[i-2*di{r}];
+    E{u}[i] = +E{u}[i-di{r}];
+    E{v}[i] = +E{v}[i-di{r}];
+}} else if ({r} == s{r} - 2) {{
+    E{r}[i] = 0;
+}}
+"""
+        bc_H = """
+if ({r} == s{r} - 1) {{
+    H{r}[i] = +H{r}[i-di{r}];
+    H{u}[i] = -H{u}[i-2*di{r}];
+    H{v}[i] = -H{v}[i-2*di{r}];
+}} else if ({r} == s{r} - 2) {{
+    H{u}[i] = 0;
+    H{v}[i] = 0;
+}}
+"""
+    else:
+        raise Exception()
+
+    replacements = {'r': r, 'u': uv[0], 'v': uv[1]}
+    return [s.format(**replacements) for s in (bc_E, bc_H)]
+
+
+def cpml(direction: int,
+         polarity: int,
+         dt: float,
+         thickness: int=8,
+         epsilon_eff: float=1,
+         ) -> Dict:
+    """
+    Generate source code for complex phase matched layer (cpml) absorbing boundaries.
+    These are not full boundary conditions and require a conducting boundary to be added
+     in the same direction as the pml.
+
+    :param direction: integer in range(3), corresponding to x, y, z directions.
+    :param polarity: -1 or 1, corresponding to eg. -x or +x direction.
+    :param dt: timestep used by the simulation
+    :param thickness: Number of cells used by the pml (the grid is NOT expanded to add these cells). Default 8.
+    :param epsilon_eff: Effective epsilon_r of the pml layer. Default 1.
+    :return: Dict with entries 'E', 'H' (update equations for E and H) and 'psi_E', 'psi_H' (lists of str,
+            specifying the field names of the cpml fields used in the E and H update steps. Eg.,
+            Psi_xn_Ex for the complex Ex component for the x- pml.)
+    """
+    if direction not in range(3):
+        raise Exception('Invalid direction: {}'.format(direction))
+
+    if polarity not in (-1, 1):
+        raise Exception('Invalid polarity: {}'.format(polarity))
+
+    if thickness <= 2:
+        raise Exception('It would be wise to have a pml with 4+ cells of thickness')
+
+    if epsilon_eff <= 0:
+        raise Exception('epsilon_eff must be positive')
+
+    m = (3.5, 1)
+    sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(epsilon_eff)
+    alpha_max = 0  # TODO: Decide what to do about non-zero alpha
+    transverse = numpy.delete(range(3), direction)
+
+    r = 'xyz'[direction]
+    np = 'nVp'[numpy.sign(polarity)+1]
+    uv = ['xyz'[i] for i in transverse]
+
+    xE = numpy.arange(1, thickness+1, dtype=float)[::-1]
+    xH = numpy.arange(1, thickness+1, dtype=float)[::-1]
+    if polarity > 0:
+        xE -= 0.5
+    elif polarity < 0:
+        xH -= 0.5
+
+    def par(x):
+        sigma = ((x / thickness) ** m[0]) * sigma_max
+        alpha = ((1 - x / thickness) ** m[1]) * alpha_max
+        p0 = numpy.exp(-(sigma + alpha) * dt)
+        p1 = sigma / (sigma + alpha) * (p0 - 1)
+        return p0, p1
+    p0e, p1e = par(xE)
+    p0h, p1h = par(xH)
+
+    vals = {'r': r,
+            'u': uv[0],
+            'v': uv[1],
+            'np':   np,
+            'th':   thickness,
+            'p0e': ', '.join((str(x) for x in p0e)),
+            'p1e': ', '.join((str(x) for x in p1e)),
+            'p0h': ', '.join((str(x) for x in p0h)),
+            'p1h': ', '.join((str(x) for x in p1h)),
+            'se': '-+'[direction % 2],
+            'sh': '+-'[direction % 2]}
+
+    if polarity < 0:
+        bounds_if = """
+if ( 0 < {r} && {r} < {th} + 1 ) {{
+    const int ir = {r} - 1;                             // index into pml parameters
+    const int ip = {v} + {u} * s{v} + ir * s{v} * s{u};   // linear index into Psi
+"""
+    elif polarity > 0:
+        bounds_if = """
+if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{
+    const int ir = (s{r} - 1) - ({r} + 1);              // index into pml parameters
+    const int ip = {v} + {u} * s{v} + ir * s{v} * s{u};   // linear index into Psi
+"""
+    else:
+        raise Exception('Bad polarity (=0)')
+
+    code_E = """
+    // pml parameters:
+    const float p0[{th}] = {{ {p0e} }};
+    const float p1[{th}] = {{ {p1e} }};
+
+    Psi_{r}{np}_E{u}[ip] = p0[ir] * Psi_{r}{np}_E{u}[ip] + p1[ir] * (H{v}[i] - H{v}[i-di{r}]);
+    Psi_{r}{np}_E{v}[ip] = p0[ir] * Psi_{r}{np}_E{v}[ip] + p1[ir] * (H{u}[i] - H{u}[i-di{r}]);
+
+    E{u}[i] {se}= dt / eps{u}[i] * Psi_{r}{np}_E{u}[ip];
+    E{v}[i] {sh}= dt / eps{v}[i] * Psi_{r}{np}_E{v}[ip];
+}}
+"""
+    code_H = """
+    // pml parameters:
+    const float p0[{th}] = {{ {p0h} }};
+    const float p1[{th}] = {{ {p1h} }};
+
+    Psi_{r}{np}_H{u}[ip] = p0[ir] * Psi_{r}{np}_H{u}[ip] + p1[ir] * (E{v}[i+di{r}] - E{v}[i]);
+    Psi_{r}{np}_H{v}[ip] = p0[ir] * Psi_{r}{np}_H{v}[ip] + p1[ir] * (E{u}[i+di{r}] - E{u}[i]);
+
+    H{u}[i] {sh}= dt * Psi_{r}{np}_H{u}[ip];
+    H{v}[i] {se}= dt * Psi_{r}{np}_H{v}[ip];
+}}
+"""
+
+    pml_data = {
+        'E': (bounds_if + code_E).format(**vals),
+        'H': (bounds_if + code_H).format(**vals),
+        'psi_E': ['Psi_{r}{np}_E{u}'.format(**vals),
+                  'Psi_{r}{np}_E{v}'.format(**vals)],
+        'psi_H': ['Psi_{r}{np}_H{u}'.format(**vals),
+                  'Psi_{r}{np}_H{v}'.format(**vals)],
+    }
+
+    return pml_data
--- a/fdtd/simulation.py
+++ b/fdtd/simulation.py
@ -0,0 +1,207 @@
+"""
+Class for constructing and holding the basic FDTD operations and fields
+"""
+
+from typing import List, Dict, Callable
+import numpy
+import warnings
+
+import pyopencl
+import pyopencl.array
+from pyopencl.elementwise import ElementwiseKernel
+
+from . import boundary, base
+from .base import type_to_C
+
+
+class Simulation(object):
+    """
+    Constructs and holds the basic FDTD operations and related fields
+    """
+    E = None    # type: List[pyopencl.array.Array]
+    H = None    # type: List[pyopencl.array.Array]
+    eps = None  # type: List[pyopencl.array.Array]
+    dt = None   # type: float
+
+    arg_type = None     # type: numpy.float32 or numpy.float64
+
+    context = None      # type: pyopencl.Context
+    queue = None        # type: pyopencl.CommandQueue
+
+    update_E = None     # type: Callable[[],pyopencl.Event]
+    update_H = None     # type: Callable[[],pyopencl.Event]
+
+    conductor_E = None  # type: Callable[[],pyopencl.Event]
+    conductor_H = None  # type: Callable[[],pyopencl.Event]
+
+    cpml_E = None       # type: Callable[[],pyopencl.Event]
+    cpml_H = None       # type: Callable[[],pyopencl.Event]
+
+    cpml_psi_E = None   # type: Dict[str, pyopencl.array.Array]
+    cpml_psi_H = None   # type: Dict[str, pyopencl.array.Array]
+
+    def __init__(self,
+                 epsilon: List[numpy.ndarray],
+                 dt: float=.99/numpy.sqrt(3),
+                 initial_E: List[numpy.ndarray]=None,
+                 initial_H: List[numpy.ndarray]=None,
+                 context: pyopencl.Context=None,
+                 queue: pyopencl.CommandQueue=None,
+                 float_type: numpy.float32 or numpy.float64=numpy.float32):
+        """
+        Initialize the simulation.
+
+        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
+                spanning the simulation domain. Relative epsilon is used.
+        :param dt: Time step. Default is the Courant factor.
+        :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
+        :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
+        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
+        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
+        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+        """
+
+        if len(epsilon) != 3:
+            Exception('Epsilon must be a list with length of 3')
+        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
+            Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+
+        if context is None:
+            self.context = pyopencl.create_some_context(False)
+        else:
+            self.context = context
+
+        if queue is None:
+            self.queue = pyopencl.CommandQueue(self.context)
+        else:
+            self.queue = queue
+
+        if dt > .99/numpy.sqrt(3):
+            warnings.warn('Warning: unstable dt: {}'.format(dt))
+        elif dt <= 0:
+            raise Exception('Invalid dt: {}'.format(dt))
+        else:
+            self.dt = dt
+
+        self.arg_type = float_type
+
+        self.eps = [pyopencl.array.to_device(self.queue, e.astype(float_type)) for e in epsilon]
+
+        if initial_E is None:
+            self.E = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)]
+        else:
+            if len(initial_E) != 3:
+                Exception('Initial_E must be a list of length 3')
+            if not all((E.shape == epsilon[0].shape for E in initial_E)):
+                Exception('Initial_E list elements must have same shape as epsilon elements')
+            self.E = [pyopencl.array.to_device(self.queue, E.astype(float_type)) for E in initial_E]
+
+        if initial_H is None:
+            self.H = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)]
+        else:
+            if len(initial_H) != 3:
+                Exception('Initial_H must be a list of length 3')
+            if not all((H.shape == epsilon[0].shape for H in initial_H)):
+                Exception('Initial_H list elements must have same shape as epsilon elements')
+            self.H = [pyopencl.array.to_device(self.queue, H.astype(float_type)) for H in initial_H]
+
+        E_args = [type_to_C(self.arg_type) + ' *E' + c for c in 'xyz']
+        H_args = [type_to_C(self.arg_type) + ' *H' + c for c in 'xyz']
+        eps_args = [type_to_C(self.arg_type) + ' *eps' + c for c in 'xyz']
+        dt_arg = [type_to_C(self.arg_type) + ' dt']
+
+        sxyz = base.shape_source(epsilon[0].shape)
+        E_source = sxyz + base.dixyz_source + base.maxwell_E_source
+        H_source = sxyz + base.dixyz_source + base.maxwell_H_source
+
+        E_update = ElementwiseKernel(self.context, operation=E_source,
+                                     arguments=', '.join(E_args + H_args + dt_arg + eps_args))
+
+        H_update = ElementwiseKernel(self.context, operation=H_source,
+                                     arguments=', '.join(E_args + H_args + dt_arg))
+
+        self.update_E = lambda e: E_update(*self.E, *self.H, self.dt, *self.eps, wait_for=e)
+        self.update_H = lambda e: H_update(*self.E, *self.H, self.dt, wait_for=e)
+
+    def init_cpml(self, pml_args: List[Dict]):
+        """
+        Initialize absorbing layers (cpml: complex phase matched layer). PMLs are not actual
+         boundary conditions, so you should add a conducting boundary (.init_conductors()) for
+         all directions in which you add PMLs.
+        Allows use of self.cpml_E(events) and self.cpml_H(events).
+        All necessary additional fields are created on the opencl device.
+
+        :param pml_args: A list containing dictionaries which are passed to .boundary.cpml(...).
+            The dt argument is set automatically, but the others must be passed in each entry
+             of pml_args.
+        """
+        sxyz = base.shape_source(self.eps[0].shape)
+
+        # Prepare per-iteration constants for later use
+        pml_E_source = sxyz + base.dixyz_source + base.xyz_source
+        pml_H_source = sxyz + base.dixyz_source + base.xyz_source
+
+        psi_E = []
+        psi_H = []
+        psi_E_names = []
+        psi_H_names = []
+        for arg_set in pml_args:
+            pml_data = boundary.cpml(dt=self.dt, **arg_set)
+
+            pml_E_source += pml_data['E']
+            pml_H_source += pml_data['H']
+
+            ti = numpy.delete(range(3), arg_set['direction'])
+            trans = [self.eps[0].shape[i] for i in ti]
+            psi_shape = (8, trans[0], trans[1])
+
+            psi_E += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type)
+                      for _ in pml_data['psi_E']]
+            psi_H += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type)
+                      for _ in pml_data['psi_H']]
+
+            psi_E_names += pml_data['psi_E']
+            psi_H_names += pml_data['psi_H']
+
+        E_args = [type_to_C(self.arg_type) + ' *E' + c for c in 'xyz']
+        H_args = [type_to_C(self.arg_type) + ' *H' + c for c in 'xyz']
+        eps_args = [type_to_C(self.arg_type) + ' *eps' + c for c in 'xyz']
+        dt_arg = [type_to_C(self.arg_type) + ' dt']
+        arglist_E = [type_to_C(self.arg_type) + ' *' + psi for psi in psi_E_names]
+        arglist_H = [type_to_C(self.arg_type) + ' *' + psi for psi in psi_H_names]
+        pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E)
+        pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H)
+
+        pml_E = ElementwiseKernel(self.context, arguments=pml_E_args, operation=pml_E_source)
+        pml_H = ElementwiseKernel(self.context, arguments=pml_H_args, operation=pml_H_source)
+
+        self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, wait_for=e)
+        self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, wait_for=e)
+        self.cmpl_psi_E = {k: v for k, v in zip(psi_E_names, psi_E)}
+        self.cmpl_psi_H = {k: v for k, v in zip(psi_H_names, psi_H)}
+
+    def init_conductors(self, conductor_args: List[Dict]):
+        """
+        Initialize reflecting boundary conditions.
+        Allows use of self.conductor_E(events) and self.conductor_H(events).
+
+        :param conductor_args: List of dictionaries with which to call .boundary.conductor(...).
+        """
+
+        sxyz = base.shape_source(self.eps[0].shape)
+
+        # Prepare per-iteration constants
+        bc_E_source = sxyz + base.dixyz_source + base.xyz_source
+        bc_H_source = sxyz + base.dixyz_source + base.xyz_source
+        for arg_set in conductor_args:
+            [e, h] = boundary.conductor(**arg_set)
+            bc_E_source += e
+            bc_H_source += h
+
+        E_args = [type_to_C(self.arg_type) + ' *E' + c for c in 'xyz']
+        H_args = [type_to_C(self.arg_type) + ' *H' + c for c in 'xyz']
+        bc_E = ElementwiseKernel(self.context, arguments=E_args, operation=bc_E_source)
+        bc_H = ElementwiseKernel(self.context, arguments=H_args, operation=bc_H_source)
+
+        self.conductor_E = lambda e: bc_E(*self.E, wait_for=e)
+        self.conductor_H = lambda e: bc_H(*self.H, wait_for=e)
--- a/pcgen.py
+++ b/pcgen.py
@ -0,0 +1,206 @@
+"""
+Routines for creating normalized 2D lattices and common photonic crystal
+ cavity designs.
+"""
+
+from typing import List
+
+import numpy
+
+
+def triangular_lattice(dims: List[int],
+                       asymmetrical: bool=False
+                       ) -> numpy.ndarray:
+    """
+    Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
+     a triangular lattice in 2D. The lattice will be centered around (0, 0),
+     unless asymmetrical=True in which case there will be extra holes in the +x
+     direction.
+
+    :param dims: Number of lattice sites in the [x, y] directions.
+    :param asymmetrical: If true, each row in x will contain the same number of
+            lattice sites. If false, the structure is symmetrical around (0, 0).
+    :return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
+    """
+    dims = numpy.array(dims, dtype=int)
+
+    if asymmetrical:
+        k = 0
+    else:
+        k = 1
+
+    positions = []
+    for j in numpy.arange(dims[1]):
+        j_odd = j % 2
+        x_offset = (j_odd * 0.5) - dims[0]/2
+        y_offset = dims[1]/2
+        xs = numpy.arange(dims[0] - k * j_odd) + x_offset
+        ys = numpy.full_like(xs, j * numpy.sqrt(3)/2 + y_offset)
+        positions += [numpy.vstack((xs, ys)).T]
+    xy = numpy.vstack(tuple(positions))
+    return xy[xy[:, 0].argsort(), ]
+
+
+def square_lattice(dims: List[int]) -> numpy.ndarray:
+    """
+    Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
+     a square lattice in 2D. The lattice will be centered around (0, 0).
+
+    :param dims: Number of lattice sites in the [x, y] directions.
+    :return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
+    """
+    xs, ys = numpy.meshgrid(range(dims[0]), range(dims[1]), 'xy')
+    xs -= dims[0]/2
+    ys -= dims[1]/2
+    xy = numpy.vstack((xs.flatten(), ys.flatten())).T
+    return xy[xy[:, 0].argsort(), ]
+
+# ### Photonic crystal functions ###
+
+
+def nanobeam_holes(a_defect: float,
+                   num_defect_holes: int,
+                   num_mirror_holes: int
+                   ) -> numpy.ndarray:
+    """
+    Returns a list of [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
+     Creates a region in which the lattice constant and radius are progressively
+     (linearly) altered over num_defect_holes holes until they reach the value
+     specified by a_defect, then symmetrically returned to a lattice constant and
+     radius of 1, which is repeated num_mirror_holes times on each side.
+
+    :param a_defect: Minimum lattice constant for the defect, as a fraction of the
+            mirror lattice constant (ie., for no defect, a_defect = 1).
+    :param num_defect_holes: How many holes form the defect (per-side)
+    :param num_mirror_holes: How many holes form the mirror (per-side)
+    :return: Ndarray [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
+    """
+    a_values = numpy.linspace(a_defect, 1, num_defect_holes, endpoint=False)
+    xs = a_values.cumsum() - (a_values[0] / 2)  # Later mirroring makes center distance 2x as long
+    mirror_xs = numpy.arange(1, num_mirror_holes + 1) + xs[-1]
+    mirror_rs = numpy.ones_like(mirror_xs)
+    return numpy.vstack((numpy.hstack((-mirror_xs[::-1], -xs[::-1], xs, mirror_xs)),
+                         numpy.hstack((mirror_rs[::-1], a_values[::-1], a_values, mirror_rs)))).T
+
+
+def ln_defect(mirror_dims: List[int], defect_length: int) -> numpy.ndarray:
+    """
+    N-hole defect in a triangular lattice.
+
+    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
+            is 2 * n + 1 in each direction.
+    :param defect_length: Length of defect. Should be an odd number.
+    :return: [[x0, y0], [x1, y1], ...] for all the holes
+    """
+    if defect_length % 2 != 1:
+        raise Exception('defect_length must be odd!')
+    p = triangular_lattice([2 * d + 1 for d in mirror_dims])
+    half_length = numpy.floor(defect_length / 2)
+    hole_nums = numpy.arange(-half_length, half_length + 1)
+    holes_to_keep = numpy.in1d(p[:, 0], hole_nums, invert=True)
+    return p[numpy.logical_or(holes_to_keep, p[:, 1] != 0), ]
+
+
+def ln_shift_defect(mirror_dims: List[int],
+                    defect_length: int,
+                    shifts_a: List[float]=(0.15, 0, 0.075),
+                    shifts_r: List[float]=(1, 1, 1)
+                    ) -> numpy.ndarray:
+    """
+    N-hole defect with shifted holes (intended to give the mode a gaussian profile
+     in real- and k-space so as to improve both Q and confinement). Holes along the
+     defect line are shifted and altered according to the shifts_* parameters.
+
+    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
+            is 2 * n + 1 in each direction.
+    :param defect_length: Length of defect. Should be an odd number.
+    :param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
+    :param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
+    :return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
+    """
+    if not hasattr(shifts_a, "__len__") and shifts_a is not None:
+        shifts_a = [shifts_a]
+    if not hasattr(shifts_r, "__len__") and shifts_r is not None:
+        shifts_r = [shifts_r]
+
+    xy = ln_defect(mirror_dims, defect_length)
+
+    # Add column for radius
+    xyr = numpy.hstack((xy, numpy.ones((xy.shape[0], 1))))
+
+    # Shift holes
+    # Expand shifts as necessary
+    n_shifted = max(len(shifts_a), len(shifts_r))
+
+    tmp_a = numpy.array(shifts_a)
+    shifts_a = numpy.ones((n_shifted, ))
+    shifts_a[:len(tmp_a)] = tmp_a
+
+    tmp_r = numpy.array(shifts_r)
+    shifts_r = numpy.ones((n_shifted, ))
+    shifts_r[:len(tmp_r)] = tmp_r
+
+    x_removed = numpy.floor(defect_length / 2)
+
+    for ind in range(n_shifted):
+        for sign in (-1, 1):
+            x_val = sign * (x_removed + ind + 1)
+            which = numpy.logical_and(xyr[:, 0] == x_val, xyr[:, 1] == 0)
+            xyr[which, ] = (x_val + numpy.sign(x_val) * shifts_a[ind], 0, shifts_r[ind])
+
+    return xyr
+
+
+def r6_defect(mirror_dims: List[int]) -> numpy.ndarray:
+    """
+    R6 defect in a triangular lattice.
+
+    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
+            is 2 * n + 1 in each direction.
+    :return: [[x0, y0], [x1, y1], ...] specifying hole centers.
+    """
+    xy = triangular_lattice([2 * d + 1 for d in mirror_dims])
+
+    rem_holes_plus = numpy.array([[1, 0],
+                                  [0.5, +numpy.sqrt(3)/2],
+                                  [0.5, -numpy.sqrt(3)/2]])
+    rem_holes = numpy.vstack((rem_holes_plus, -rem_holes_plus))
+
+    for rem_xy in rem_holes:
+        xy = xy[(xy != rem_xy).any(axis=1), ]
+
+    return xy
+
+
+def l3_shift_perturbed_defect(mirror_dims: List[int],
+                              perturbed_radius: float=1.1,
+                              shifts_a: List[float]=(),
+                              shifts_r: List[float]=()
+                              ) -> numpy.ndarray:
+    """
+    3-hole defect with perturbed hole sizes intended to form an upwards-directed
+     beam. Can also include shifted holes along the defect line, intended
+     to give the mode a more gaussian profile to improve Q.
+
+    :param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
+            is 2 * n + 1 in each direction.
+    :param perturbed_radius: Amount to perturb the radius of the holes used for beam-forming
+    :param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
+    :param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
+    :return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
+    """
+    xyr = ln_shift_defect(mirror_dims, 3, shifts_a, shifts_r)
+
+    abs_x, abs_y = (numpy.fabs(xyr[:, i]) for i in (0, 1))
+
+    # Sorted unique xs and ys
+    # Ignore row y=0 because it might have shifted holes
+    xs = numpy.unique(abs_x[abs_x != 0])
+    ys = numpy.unique(abs_y)
+
+    # which holes should be perturbed? (xs[[3, 7]], ys[1]) and (xs[[2, 6]], ys[2])
+    perturbed_holes = ((xs[a], ys[b]) for a, b in ((3, 1), (7, 1), (2, 2), (6, 2)))
+    for row in xyr:
+        if numpy.fabs(row) in perturbed_holes:
+            row[2] = perturbed_radius
+    return xyr
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
+numpy
+h5py
+pyopencl
+-e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster
+-e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock
+-e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque