Revert "Add restrict keyword to pointers (not sharing the same memory for multiple fields)"

This reverts commit 60b70bb332d29fb5a2aecb801517be659cf4daf9. It appears to have minimal performance impact, and fails to compile on nvidia cards.
style fixes
2019-07-17 00:55:28 -07:00 · 2019-07-15 00:06:43 -07:00 · 2019-07-15 00:06:43 -07:00 · 2019-07-15 00:06:43 -07:00 · 2018-11-30 01:04:00 -08:00 · 2018-11-30 01:03:20 -08:00
15 changed files with 689 additions and 384 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,9 @@
 .idea/
-__pycache__
 *.h5
-*.pyc
+
+__pycache__
+*.py[cod]
+build/
+dist/
+*.egg-info/

--- a/README.md
+++ b/README.md
@ -27,14 +27,16 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs).
 * numpy
 * pyopencl
 * jinja2
+* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools)
+
+Optional (used for examples):
 * dill (for file output)
-* [gridlock](https://mpxd.net/gogs/jan/gridlock)
-* [masque](https://mpxd.net/gogs/jan/masque)
-* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
+* [gridlock](https://mpxd.net/code/jan/gridlock)
+* [masque](https://mpxd.net/code/jan/masque)

 To get the code, just clone this repository:
 ```bash
-git clone https://mpxd.net/gogs/jan/opencl_fdtd.git
+git clone https://mpxd.net/code/jan/opencl_fdtd.git
 ```

 You can install the requirements and their dependencies easily with
--- a/fdtd.py
+++ b/fdtd.py
@ -6,12 +6,13 @@ See main() for simulation setup.

 import sys
 import time
+import logging

 import numpy
-import lzma 
+import lzma
 import dill

-from fdtd.simulation import Simulation
+from opencl_fdtd import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
@ -20,6 +21,9 @@ import fdfd_tools

 __author__ = 'Jan Petykiewicz'

+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+

 def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
    """
@ -120,9 +124,13 @@ def main():
                       eps=n_air**2,
                       polygons=mask.as_polygons())

-    print('grid shape: {}'.format(grid.shape))
+    logger.info('grid shape: {}'.format(grid.shape))
    # #### Create the simulation grid ####
-    sim = Simulation(grid.grids, do_poynting=False, pmls=[])
+    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
+            for a in 'xyz' for p in 'np']
+    #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
+    bloch = []
+    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)

    # Source parameters and function
    w = 2 * numpy.pi * dx / wl
@ -133,31 +141,41 @@ def main():
    def field_source(i):
        t0 = i * sim.dt - delay
        return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2)
-   
+
    with open('sources.c', 'w') as f:
        f.write(sim.sources['E'])
-        f.write('\n==========================================\n')
+        f.write('\n====================H======================\n')
        f.write(sim.sources['H'])
        if sim.update_S:
-            f.write('\n==========================================\n')
+            f.write('\n=====================S=====================\n')
            f.write(sim.sources['S'])
+        if bloch:
+            f.write('\n=====================F=====================\n')
+            f.write(sim.sources['F'])
+            f.write('\n=====================G=====================\n')
+            f.write(sim.sources['G'])

    # #### Run a bunch of iterations ####
    # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
    #  immediately and run once prev_event is finished.
    start = time.perf_counter()
    for t in range(max_t):
-        sim.update_E([]).wait()
+        e = sim.update_E([])
+        if bloch:
+            e = sim.update_F([e])
+        e.wait()

        ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
-#        sim.buf[ind] += field_source(t)
+        sim.E[ind] += field_source(t)
        e = sim.update_H([])
+        if bloch:
+            e = sim.update_G([e])
        if sim.update_S:
            e = sim.update_S([e])
        e.wait()

        if t % 100 == 0:
-            print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+            logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
            sys.stdout.flush()

    with lzma.open('saved_simulation', 'wb') as f:
@ -172,5 +190,6 @@ def main():
            d['S'] = unvec(sim.S.get())
        dill.dump(d, f)

+
 if __name__ == '__main__':
    main()
--- a/fdtd/init.py
+++ b/fdtd/init.py
--- a/fdtd/kernels/update_e.cl
+++ b/fdtd/kernels/update_e.cl
@ -1,78 +0,0 @@
-/*
- *  Update E-field, including any PMLs.
- *  
- *  Template parameters:
- *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
- *   
- *  OpenCL args:
- *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
- */
-
-{{common_header}}
-
-////////////////////////////////////////////////////////////////////////////
-
-__global ftype *epsx = eps + XX;
-__global ftype *epsy = eps + YY;
-__global ftype *epsz = eps + ZZ;
-
-{% if pmls -%}
-const int pml_thickness = {{pml_thickness}};
-{%- endif %}
-
-/*
- *   Precalclate derivatives
- */
-ftype dHxy = Hx[i] - Hx[i + my];
-ftype dHxz = Hx[i] - Hx[i + mz];
-
-ftype dHyx = Hy[i] - Hy[i + mx];
-ftype dHyz = Hy[i] - Hy[i + mz];
-
-ftype dHzx = Hz[i] - Hz[i + mx];
-ftype dHzy = Hz[i] - Hz[i + my];
-
-/*
- *   PML Update
- */
-// PML effects on E
-ftype pExi = 0;
-ftype pEyi = 0;
-ftype pEzi = 0;
-
-{% for r, p in pmls -%}
-    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
-    {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
-    {%- if r != 'y' -%}
-        {%- set se, sh = '-', '+' -%}
-    {%- else -%}
-        {%- set se, sh = '+', '-' -%}
-    {%- endif -%}
-
-    {%- if p == 'n' %}
-
-if ( {{r}} < pml_thickness ) {
-    const size_t ir = {{r}};                          // index into pml parameters
-
-    {%- elif p == 'p' %}
-
-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
-    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
-
-    {%- endif %}
-    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
-    {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
-    {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
-    pE{{u}}i {{se}}= {{psi ~ u}}[ip];
-    pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
-}
-{%- endfor %}
-
-/*
- *  Update E
- */
-Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
-Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
-Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
--- a/fdtd/simulation.py
+++ b/fdtd/simulation.py
@ -1,253 +0,0 @@
-"""
-Class for constructing and holding the basic FDTD operations and fields
-"""
-
-from typing import List, Dict, Callable
-from collections import OrderedDict
-import numpy
-import jinja2
-import warnings
-
-import pyopencl
-import pyopencl.array
-from pyopencl.elementwise import ElementwiseKernel
-
-from fdfd_tools import vec
-
-
-__author__ = 'Jan Petykiewicz'
-
-float4 = pyopencl.array.vec.float4
-
-# Create jinja2 env on module load
-jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
-
-
-class Simulation(object):
-    """
-    Constructs and holds the basic FDTD operations and related fields
-    """
-    E = None    # type: List[pyopencl.array.Array]
-    H = None    # type: List[pyopencl.array.Array]
-    S = None    # type: List[pyopencl.array.Array]
-    eps = None  # type: List[pyopencl.array.Array]
-    dt = None   # type: float
-
-    arg_type = None     # type: numpy.float32 or numpy.float64
-
-    context = None      # type: pyopencl.Context
-    queue = None        # type: pyopencl.CommandQueue
-
-    update_E = None     # type: Callable[[],pyopencl.Event]
-    update_H = None     # type: Callable[[],pyopencl.Event]
-    update_S = None     # type: Callable[[],pyopencl.Event]
-    sources = None      # type: Dict[str, str]
-
-    def __init__(self,
-                 epsilon: List[numpy.ndarray],
-                 dt: float = .99/numpy.sqrt(3),
-                 initial_E: List[numpy.ndarray] = None,
-                 initial_H: List[numpy.ndarray] = None,
-                 context: pyopencl.Context = None,
-                 queue: pyopencl.CommandQueue = None,
-                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 pml_thickness: int = 10,
-                 pmls: List[List[str]] = None,
-                 do_poynting: bool = True):
-        """
-        Initialize the simulation.
-
-        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
-                spanning the simulation domain. Relative epsilon is used.
-        :param dt: Time step. Default is the Courant factor.
-        :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
-        :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
-        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
-        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
-        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
-        """
-
-        if len(epsilon) != 3:
-            Exception('Epsilon must be a list with length of 3')
-        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
-            Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
-
-        if context is None:
-            self.context = pyopencl.create_some_context()
-        else:
-            self.context = context
-
-        if queue is None:
-            self.queue = pyopencl.CommandQueue(self.context)
-        else:
-            self.queue = queue
-
-        if dt > .99/numpy.sqrt(3):
-            warnings.warn('Warning: unstable dt: {}'.format(dt))
-        elif dt <= 0:
-            raise Exception('Invalid dt: {}'.format(dt))
-        else:
-            self.dt = dt
-
-        def make4d(f):
-            g = [numpy.transpose(fi)[:,:,:,None] for fi in f]
-            h = g + [numpy.empty_like(g[0])]
-            j = numpy.concatenate(h, axis=3)
-            return numpy.ascontiguousarray(j, dtype=numpy.float32) 
-
-        self.arg_type = float_type
-        self.sources = {}
-        ef = make4d(epsilon).astype(float_type)
-        self.eps = pyopencl.image_from_array(self.context,
-                make4d(epsilon), num_channels=4, mode='r')
-
-        self.buf = pyopencl.array.Array(self.queue, shape=epsilon.shape, dtype=float4)
-
-        if initial_E is None:
-            self.E = pyopencl.image_from_array(self.context,
-                make4d(epsilon) * 0, num_channels=4, mode='r')
-        else:
-            if len(initial_E) != 3:
-                Exception('Initial_E must be a list of length 3')
-            if not all((E.shape == epsilon[0].shape for E in initial_E)):
-                Exception('Initial_E list elements must have same shape as epsilon elements')
-            self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
-
-        if initial_H is None:
-            self.H = pyopencl.image_from_array(self.context,
-                make4d(epsilon) * 0, num_channels=4, mode='r')
-        else:
-            if len(initial_H) != 3:
-                Exception('Initial_H must be a list of length 3')
-            if not all((H.shape == epsilon[0].shape for H in initial_H)):
-                Exception('Initial_H list elements must have same shape as epsilon elements')
-            self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
-
-        if pmls is None:
-            pmls = [[d, p] for d in 'xyz' for p in 'np']
-
-        ctype = type_to_C(self.arg_type)
-
-        def ptr(arg: str) -> str:
-            return ctype + ' *' + arg
-
-        base_fields = OrderedDict()
-
-        common_source = jinja_env.get_template('common.cl').render(
-                shape=epsilon[0].shape,
-                )
-        jinja_args = {
-                'common_header': common_source,
-                'pml_thickness': pml_thickness,
-                'pmls': pmls,
-                'do_poynting': do_poynting,
-                }
-        E_source = jinja_env.get_template('update_e_full.cl').render(**jinja_args)
-        H_source = jinja_env.get_template('update_h_full.cl').render(**jinja_args)
-
-        self.sources['E'] = E_source
-        self.sources['H'] = H_source
-
-        if do_poynting:
-            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
-            self.sources['S'] = S_source
-
-            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type)
-            self.S = pyopencl.array.zeros_like(self.E)
-            S_fields = OrderedDict()
-            S_fields[ptr('oS')] = self.oS
-            S_fields[ptr('S')] = self.S
-        else:
-            S_fields = OrderedDict() 
-
-        '''
-        PML
-        '''
-        m = (3.5, 1)
-        sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0)
-        alpha_max = 0  # TODO: Decide what to do about non-zero alpha
-
-        def par(x):
-            sigma = ((x / pml_thickness) ** m[0]) * sigma_max
-            alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max
-            p0 = numpy.exp(-(sigma + alpha) * dt)
-            p1 = sigma / (sigma + alpha) * (p0 - 1)
-            return p0, p1
-        
-        xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
-        xep -= 0.5
-        xhn -= 0.5
-
-        pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh']
-        pml_e_fields = OrderedDict()
-        pml_h_fields = OrderedDict()
-        for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)):
-            pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe)
-            pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph)
-
-        for pml in pmls:
-            uv = 'xyz'.replace(pml[0], '')
-            psi_base = 'Psi_' + ''.join(pml) + '_'
-            psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
-
-            psi_shape = list(epsilon[0].shape)
-            psi_shape['xyz'.find(pml[0])] = pml_thickness
-
-            for ne, nh in zip(*psi_names):
-                pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
-                pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
-        
-        self.pml_e_fields = pml_e_fields
-        self.pml_h_fields = pml_h_fields
-
-
-        '''
-        Create operations
-        '''
-        E_update = pyopencl.Program(self.context, E_source).build().update_e
-        H_update = pyopencl.Program(self.context, H_source).build().update_h
-        max_gs = E_update.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE,
-                                      self.queue.device)
-        gs, ls = self.buf.get_sizes(self.queue, max_gs)
-        print('gs', gs, ls, max_gs)
-        def update_E(e):
-            e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.E, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e)
-            e = E_update(self.queue, gs, ls, self.buf.data, self.H, numpy.float32(dt), self.eps, numpy.uint32(self.buf.size), wait_for=[e])
-            return e
-
-        def update_H(e):
-            e = pyopencl.enqueue_copy(self.queue, self.E, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e)
-            e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.H, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e])
-            e = H_update(self.queue, gs, ls, self.E, self.buf.data, numpy.float32(dt), numpy.uint32(self.buf.size), wait_for=[e])
-            e = pyopencl.enqueue_copy(self.queue, self.H, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e])
-            return e
-
-        self.update_E = update_E
-        self.update_H = update_H
-
-        if do_poynting:
-            S_args = OrderedDict()
-            [S_args.update(d) for d in (base_fields, S_fields)]
-            S_update = ElementwiseKernel(self.context, operation=S_source,
-                                         arguments=', '.join(S_args.keys()))
-
-            self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
-
-
-def type_to_C(float_type) -> str:
-    """
-    Returns a string corresponding to the C equivalent of a numpy type.
-    Only works for float16, float32, float64.
-
-    :param float_type: e.g. numpy.float32
-    :return: string containing the corresponding C type (eg. 'double')
-    """
-    if float_type == numpy.float16:
-        arg_type = 'half'
-    elif float_type == numpy.float32:
-        arg_type = 'float'
-    elif float_type == numpy.float64:
-        arg_type = 'double'
-    else:
-        raise Exception('Unsupported type')
-    return arg_type
--- a/opencl_fdtd/init.py
+++ b/opencl_fdtd/init.py
@ -0,0 +1,5 @@
+from .simulation import Simulation, type_to_C
+
+__author__ = 'Jan Petykiewicz'
+
+version = '0.4'
--- a/opencl_fdtd/kernels/common.cl
+++ b/opencl_fdtd/kernels/common.cl
@ -2,10 +2,14 @@
 /* Common code for E, H updates
 *
 * Template parameters:
+ *  ftype       type name (e.g. float or double)
 *  shape       list of 3 ints specifying shape of fields
 */
 #}

+typedef {{ftype}} ftype;
+
+
 /*
 * Field size info
 */
@ -14,6 +18,13 @@ const size_t sy = {{shape[1]}};
 const size_t sz = {{shape[2]}};
 const size_t field_size = sx * sy * sz;

+//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
+// i is outside the bounds of Ex[].
+if (i >= field_size) {
+    PYOPENCL_ELWISE_CONTINUE;
+}
+
+
 /*
 * Array indexing
 */
@ -31,6 +42,25 @@ const size_t diy = sz;
 const size_t diz = 1;


+/*
+ * Pointer math
+ */
+//Pointer offsets into the components of a linearized vector-field
+// (eg. Hx = H + XX, where H and Hx are pointers)
+const size_t XX = 0;
+const size_t YY = field_size;
+const size_t ZZ = field_size * 2;
+
+//Define pointers to vector components of each field (eg. Hx = H + XX)
+__global ftype *Ex = E + XX;
+__global ftype *Ey = E + YY;
+__global ftype *Ez = E + ZZ;
+
+__global ftype *Hx = H + XX;
+__global ftype *Hy = H + YY;
+__global ftype *Hz = H + ZZ;
+
+
 /*
 * Implement periodic boundary conditions
 *
@ -43,12 +73,13 @@ const size_t diz = 1;
 * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
 */
 {% for r in 'xyz' %}
-int m{{r}} = -1;
-int p{{r}} = +1;
-int wrap_{{r}} = s{{r}} - 1;
+int m{{r}} = - (int) di{{r}};
+int p{{r}} = + (int) di{{r}};
+int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}};
 if ( {{r}} == 0 ) {
  m{{r}} = wrap_{{r}};
-} else if ( {{r}} == s{{r}} - 1 ) {
+}
+if ( {{r}} == s{{r}} - 1 ) {
  p{{r}} = -wrap_{{r}};
-} 
+}
 {% endfor %}
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@ -0,0 +1,107 @@
+/*
+ *  Update E-field, including any PMLs.
+ *
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
+ *      axes, polarities, and thicknesses.
+ *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
+ *      Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as
+ *      OpenCL args.
+ *
+ *  OpenCL args:
+ *   E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}]
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+__global ftype *epsx = eps + XX;
+__global ftype *epsy = eps + YY;
+__global ftype *epsz = eps + ZZ;
+
+
+{%- if uniform_dx %}
+ftype inv_dx = 1.0 / {{uniform_dx}};
+ftype inv_dy = 1.0 / {{uniform_dx}};
+ftype inv_dz = 1.0 / {{uniform_dx}};
+{%- else %}
+ftype inv_dx = inv_dhx[x];
+ftype inv_dy = inv_dhy[y];
+ftype inv_dz = inv_dhz[z];
+{%- endif %}
+
+
+/*
+ *   Precalculate derivatives
+ */
+ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy;
+ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz;
+
+ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx;
+ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz;
+
+ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx;
+ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy;
+
+{% for bloch in bloch_boundaries -%}
+    {%- set r = bloch['axis'] -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+if ({{r}} == 0) {
+    ftype bloch_im = {{bloch['real']}};
+    ftype bloch_re = {{bloch['imag']}};
+    dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]);
+    dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]);
+}
+{%- endfor %}
+
+
+/*
+ *   PML Update
+ */
+// PML effects on E
+ftype pExi = 0;
+ftype pEyi = 0;
+ftype pEzi = 0;
+
+{% for pml in pmls -%}
+    {%- set r = pml['axis'] -%}
+    {%- set p = pml['polarity'] -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+    {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
+    {%- if r != 'y' -%}
+        {%- set se, sh = '-', '+' -%}
+    {%- else -%}
+        {%- set se, sh = '+', '-' -%}
+    {%- endif -%}
+
+int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+
+    {%- if p == 'n' %}
+
+if ( {{r}} < pml_{{r ~ p}}_thickness ) {
+    const size_t ir = {{r}};                          // index into pml parameters
+
+    {%- elif p == 'p' %}
+
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
+    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
+
+    {%- endif %}
+    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
+    dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
+    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
+    pE{{u}}i {{se}}= {{psi ~ u}}[ip];
+    pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
+}
+{%- endfor %}
+
+/*
+ *  Update E
+ */
+Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
+Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
+Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@ -1,36 +1,58 @@
 /*
 *  Update H-field, including any PMLs.
 *   Also precalculate values for poynting vector if necessary.
- *  
+ *
 *  Template parameters:
 *   common_header: Rendered contents of common.cl
- *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
- *   pml_thickness: Number of cells (integer)
- *   do_poynting: Whether to precalculate poynting vector components (boolean)  
+ *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
+ *      axes, polarities, and thicknesses.
+ *   do_poynting: Whether to precalculate poynting vector components (boolean)
+ *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
+ *      Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
+ *      OpenCL args.
 *
 *  OpenCL args:
- *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
 */

 {{common_header}}

 ////////////////////////////////////////////////////////////////////////////

-{% if pmls -%}
-const int pml_thickness = {{pml_thickness}};
-{%- endif %}
-
 /*
 *  Precalculate derivatives
 */
-ftype dExy = Ex[i + py] - Ex[i];
-ftype dExz = Ex[i + pz] - Ex[i];
+{%- if uniform_dx %}
+ftype inv_dx = 1.0 / {{uniform_dx}};
+ftype inv_dy = 1.0 / {{uniform_dx}};
+ftype inv_dz = 1.0 / {{uniform_dx}};
+{%- else %}
+ftype inv_dx = inv_dex[x];
+ftype inv_dy = inv_dey[y];
+ftype inv_dz = inv_dez[z];
+{%- endif %}

-ftype dEyx = Ey[i + px] - Ey[i];
-ftype dEyz = Ey[i + pz] - Ey[i];

-ftype dEzx = Ez[i + px] - Ez[i];
-ftype dEzy = Ez[i + py] - Ez[i];
+ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy;
+ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz;
+
+ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx;
+ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz;
+
+ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx;
+ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy;
+
+
+{% for bloch in bloch_boundaries -%}
+    {%- set r = bloch['axis'] -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+if ({{r}} == s{{r}} - 1) {
+    ftype bloch_re = {{bloch['real']}};
+    ftype bloch_im = {{bloch['imag']}};
+    dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
+    dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
+}
+{% endfor -%}

 {%- if do_poynting %}

@ -49,6 +71,8 @@ ftype aEzy = Ez[i + py] + Ez[i];
 {%- endif %}


+
+
 /*
 *  PML Update
 */
@ -57,29 +81,35 @@ ftype pHxi = 0;
 ftype pHyi = 0;
 ftype pHzi = 0;

-{%- for r, p in pmls -%}
+{% for pml in pmls -%}
+    {%- set r = pml['axis'] -%}
+    {%- set p = pml['polarity'] -%}
    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
    {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
    {%- if r != 'y' -%}
        {%- set se, sh = '-', '+' -%}
    {%- else -%}
        {%- set se, sh = '+', '-' -%}
-    {%- endif -%}
+    {%- endif %}
+
+int pml_{{r ~ p}}_thickness = {{pml['thickness']}};

    {%- if p == 'n' %}

-if ( {{r}} < pml_thickness ) {
+if ( {{r}} < pml_{{r ~ p}}_thickness ) {
    const size_t ir = {{r}};                          // index into pml parameters

    {%- elif p == 'p' %}

-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters

    {%- endif %}
    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
-    {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
-    {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
+    dE{{v ~ r}} *= p{{r}}2h{{p}}[ir];
+    dE{{u ~ r}} *= p{{r}}2h{{p}}[ir];
+    {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}};
+    {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}};
    pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
    pH{{v}}i {{se}}= {{psi ~ v}}[ip];
 }
@ -96,9 +126,9 @@ ftype Hz_old = Hz[i];
 {%- endif %}

 // H update equations
-Hx[i] -= dt * (dEzy - dEyz + pHxi);
-Hy[i] -= dt * (dExz - dEzx + pHyi);
-Hz[i] -= dt * (dEyx - dExy + pHzi);
+Hx[i] -= dt * (dEzy - dEyz - pHxi);
+Hy[i] -= dt * (dExz - dEzx - pHyi);
+Hz[i] -= dt * (dEyx - dExy - pHzi);

 {% if do_poynting -%}
 // Average H across timesteps
--- a/opencl_fdtd/kernels/update_j.cl
+++ b/opencl_fdtd/kernels/update_j.cl
@ -0,0 +1,32 @@
+/*
+ *  Update E-field from J field
+ *
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *
+ *  OpenCL args:
+ *   E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+__global ftype *Jrx = Jr + XX;
+__global ftype *Jry = Jr + YY;
+__global ftype *Jrz = Jr + ZZ;
+__global ftype *Jix = Ji + XX;
+__global ftype *Jiy = Ji + YY;
+__global ftype *Jiz = Ji + ZZ;
+
+
+if (x < xmin || y < ymin || z < zmin) {
+   PYOPENCL_ELWISE_CONTINUE;
+}
+if (x >= xmax || y >= ymax || z >= zmax) {
+   PYOPENCL_ELWISE_CONTINUE;
+}
+
+Ex[i] += c * Jrx[i] + s * Jix[i];
+Ey[i] += c * Jry[i] + s * Jiy[i];
+Ez[i] += c * Jrz[i] + s * Jiz[i];
--- a/opencl_fdtd/kernels/update_s.cl
+++ b/opencl_fdtd/kernels/update_s.cl
@ -1,11 +1,11 @@
 /*
 *  Update E-field, including any PMLs.
- *  
+ *
 *  Template parameters:
 *   common_header: Rendered contents of common.cl
 *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
 *   pml_thickness: Number of cells (integer)
- *   
+ *
 *  OpenCL args:
 *   E, H, dt, S, oS
 */
@ -17,12 +17,12 @@

 /*
 * Calculate S from oS (pre-calculated components)
- */ 
+ */
 __global ftype *Sx = S + XX;
 __global ftype *Sy = S + YY;
 __global ftype *Sz = S + ZZ;

-// Use unscaled S components from H locations 
+// Use unscaled S components from H locations
 __global ftype *oSxy = oS + 0 * field_size;
 __global ftype *oSyz = oS + 1 * field_size;
 __global ftype *oSzx = oS + 2 * field_size;
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@ -0,0 +1,376 @@
+"""
+Class for constructing and holding the basic FDTD operations and fields
+"""
+
+from typing import List, Dict, Callable
+from collections import OrderedDict
+import numpy
+import jinja2
+import warnings
+
+import pyopencl
+import pyopencl.array
+from pyopencl.elementwise import ElementwiseKernel
+
+from fdfd_tools import vec
+
+
+__author__ = 'Jan Petykiewicz'
+
+
+# Create jinja2 env on module load
+jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
+
+
+class Simulation(object):
+    """
+    Constructs and holds the basic FDTD operations and related fields
+
+    After constructing this object, call the (update_E, update_H, update_S) members
+     to perform FDTD updates on the stored (E, H, S) fields:
+
+        pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
+        sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
+        with open('sources.c', 'w') as f:
+            f.write('{}'.format(sim.sources))
+
+        for t in range(max_t):
+            sim.update_E([]).wait()
+
+            # Find the linear index for the center point, for Ey
+            ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \
+                    numpy.prod(grid.shape) * 1
+            # Perturb the field (i.e., add a soft current source)
+            sim.E[ind] += numpy.sin(omega * t * sim.dt)
+            event = sim.update_H([])
+            if sim.update_S:
+                event = sim.update_S([event])
+            event.wait()
+
+            with lzma.open('saved_simulation', 'wb') as f:
+                dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
+
+    Code in the form
+        event2 = sim.update_H([event0, event1])
+     indicates that the update_H operation should be prepared immediately, but wait for
+     event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
+     event2 can then be used to prepare further operations to be run after update_H.
+    """
+    E = None            # type: pyopencl.array.Array
+    H = None            # type: pyopencl.array.Array
+    S = None            # type: pyopencl.array.Array
+    eps = None          # type: pyopencl.array.Array
+    dt = None           # type: float
+    inv_dxes = None     # type: List[pyopencl.array.Array]
+
+    arg_type = None     # type: numpy.float32 or numpy.float64
+
+    context = None      # type: pyopencl.Context
+    queue = None        # type: pyopencl.CommandQueue
+
+    update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    update_J = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
+    sources = None      # type: Dict[str, str]
+
+    def __init__(self,
+                 epsilon: List[numpy.ndarray],
+                 pmls: List[Dict[str, int or float]],
+                 bloch_boundaries: List[Dict[str, int or float]] = (),
+                 dxes: List[List[numpy.ndarray]] or float = None,
+                 dt: float = None,
+                 initial_fields: Dict[str, List[numpy.ndarray]] = None,
+                 context: pyopencl.Context = None,
+                 queue: pyopencl.CommandQueue = None,
+                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
+                 do_poynting: bool = True,
+                 do_fieldsrc: bool = False):
+        """
+        Initialize the simulation.
+
+        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
+                spanning the simulation domain. Relative epsilon is used.
+        :param pmls: List of dicts with keys:
+            'axis': One of 'x', 'y', 'z'.
+            'direction': One of 'n', 'p'.
+            'thickness': Number of layers, default 8.
+            'epsilon_eff': Effective epsilon to match to. Default 1.0.
+            'mu_eff': Effective mu to match to. Default 1.0.
+            'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
+            'm': Polynomial grading exponent. Default 3.5.
+            'ma': Exponent for alpha. Default 1.
+        :param bloch_boundaries: List of dicts with keys:
+            'axis': One of 'x', 'y', 'z'.
+            'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
+            'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
+        :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
+        :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
+            specified fields (default is 0 everywhere). Fields have same format as epsilon.
+        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
+        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
+        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+        :param do_poynting: If true, enables calculation of the poynting vector, S.
+                Poynting vector calculation adds the following computational burdens:
+                    * During update_H, ~6 extra additions/cell are performed in order to spatially
+                        average E and temporally average H. These quantities are multiplied
+                        (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
+                    * update_S performs a discrete cross product using the precalculated products
+                        from update_H. This is not nice to the cache and similar to e.g. update_E
+                        in complexity.
+                    * GPU memory requirements are approximately doubled, since S and the intermediate
+                        products must be stored.
+        """
+        if initial_fields is None:
+            initial_fields = {}
+
+        self.shape = epsilon[0].shape
+        self.arg_type = float_type
+        self.sources = {}
+        self._create_context(context, queue)
+        self._create_eps(epsilon)
+
+        if dxes is None:
+            dxes = 1.0
+
+        if isinstance(dxes, (float, int)):
+            uniform_dx = dxes
+            min_dx = dxes
+        else:
+            uniform_dx = False
+            self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]]
+            min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
+
+        max_dt = min_dx * .99 / numpy.sqrt(3)
+
+        if dt is None:
+            self.dt = max_dt
+        elif dt > max_dt:
+            warnings.warn('Warning: unstable dt: {}'.format(dt))
+        elif dt <= 0:
+            raise Exception('Invalid dt: {}'.format(dt))
+        else:
+            self.dt = dt
+
+        self.E = self._create_field(initial_fields.get('E', None))
+        self.H = self._create_field(initial_fields.get('H', None))
+        if bloch_boundaries:
+            self.F = self._create_field(initial_fields.get('F', None))
+            self.G = self._create_field(initial_fields.get('G', None))
+
+        for pml in pmls:
+            pml.setdefault('thickness', 8)
+            pml.setdefault('epsilon_eff', 1.0)
+            pml.setdefault('mu_eff', 1.0)
+            pml.setdefault('ln_R_per_layer', -1.6)
+            pml.setdefault('m', 3.5)
+            pml.setdefault('cfs_alpha', 0)
+            pml.setdefault('ma', 1)
+
+        ctype = type_to_C(self.arg_type)
+
+        def ptr(arg: str) -> str:
+            return ctype + ' *' + arg
+
+        base_fields = OrderedDict()
+        base_fields[ptr('E')] = self.E
+        base_fields[ptr('H')] = self.H
+        base_fields[ctype + ' dt'] = self.dt
+        if uniform_dx == False:
+            inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
+            for name, field in zip(inv_dx_names, self.inv_dxes):
+                base_fields[ptr(name)] = field
+
+        eps_field = OrderedDict()
+        eps_field[ptr('eps')] = self.eps
+
+        if bloch_boundaries:
+            base_fields[ptr('F')] = self.F
+            base_fields[ptr('G')] = self.G
+
+            bloch_fields = OrderedDict()
+            bloch_fields[ptr('E')] = self.F
+            bloch_fields[ptr('H')] = self.G
+            bloch_fields[ctype + ' dt'] = self.dt
+            bloch_fields[ptr('F')] = self.E
+            bloch_fields[ptr('G')] = self.H
+
+        common_source = jinja_env.get_template('common.cl').render(
+                ftype=ctype,
+                shape=self.shape,
+                )
+        jinja_args = {
+                'common_header': common_source,
+                'pmls': pmls,
+                'do_poynting': do_poynting,
+                'bloch': bloch_boundaries,
+                'uniform_dx': uniform_dx,
+                }
+        E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
+        H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
+        self.sources['E'] = E_source
+        self.sources['H'] = H_source
+
+        if bloch_boundaries:
+            bloch_args = jinja_args.copy()
+            bloch_args['do_poynting'] = False
+            bloch_args['bloch'] = [{'axis': b['axis'],
+                                    'real': b['imag'],
+                                    'imag': b['real']}
+                                   for b in bloch_boundaries]
+            F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
+            G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
+            self.sources['F'] = F_source
+            self.sources['G'] = G_source
+
+
+        S_fields = OrderedDict()
+        if do_poynting:
+            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
+            self.sources['S'] = S_source
+
+            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
+            self.S = pyopencl.array.zeros_like(self.E)
+            S_fields[ptr('oS')] = self.oS
+            S_fields[ptr('S')] = self.S
+
+        J_fields = OrderedDict()
+        if do_fieldsrc:
+            J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
+            self.sources['J'] = J_source
+
+            self.Ji = pyopencl.array.zeros_like(self.E)
+            self.Jr = pyopencl.array.zeros_like(self.E)
+            J_fields[ptr('Jr')] = self.Jr
+            J_fields[ptr('Ji')] = self.Ji
+
+
+        '''
+        PML
+        '''
+        pml_e_fields, pml_h_fields = self._create_pmls(pmls)
+        if bloch_boundaries:
+            pml_f_fields, pml_g_fields = self._create_pmls(pmls)
+
+        '''
+        Create operations
+        '''
+        self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
+        self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
+        if do_poynting:
+            self.update_S = self._create_operation(S_source, (base_fields, S_fields))
+        if bloch_boundaries:
+            self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
+            self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
+        if do_fieldsrc:
+            args = OrderedDict()
+            [args.update(d) for d in (base_fields, J_fields)]
+            var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
+            update = ElementwiseKernel(self.context, operation=J_source,
+                                       arguments=', '.join(list(args.keys()) + var_args))
+            self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
+
+
+    def _create_pmls(self, pmls):
+        ctype = type_to_C(self.arg_type)
+        def ptr(arg: str) -> str:
+            return ctype + ' *' + arg
+
+        pml_e_fields = OrderedDict()
+        pml_h_fields = OrderedDict()
+        for pml in pmls:
+            a = 'xyz'.find(pml['axis'])
+
+            sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1)
+            kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
+            alpha_max = pml['cfs_alpha']
+
+            def par(x):
+                scaling = (x / pml['thickness']) ** pml['m']
+                sigma = scaling * sigma_max
+                kappa = 1 + scaling * (kappa_max - 1)
+                alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
+                p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt)
+                p1 = sigma / (sigma + kappa * alpha) * (p0 - 1)
+                p2 = 1 / kappa
+                return p0, p1, p2
+
+            xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2))
+            if pml['polarity'] == 'p':
+                xe -= 0.5
+            elif pml['polarity'] == 'n':
+                xh -= 0.5
+
+            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
+            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
+                pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
+                pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)
+
+            uv = 'xyz'.replace(pml['axis'], '')
+            psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_'
+            psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
+
+            psi_shape = list(self.shape)
+            psi_shape[a] = pml['thickness']
+
+            for ne, nh in zip(*psi_names):
+                pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+                pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+        return pml_e_fields, pml_h_fields
+
+    def _create_operation(self, source, args_fields):
+        args = OrderedDict()
+        [args.update(d) for d in args_fields]
+        update = ElementwiseKernel(self.context, operation=source,
+                                   arguments=', '.join(args.keys()))
+        return lambda e: update(*args.values(), wait_for=e)
+
+    def _create_context(self, context: pyopencl.Context = None,
+                        queue: pyopencl.CommandQueue = None):
+        if context is None:
+            self.context = pyopencl.create_some_context()
+        else:
+            self.context = context
+
+        if queue is None:
+            self.queue = pyopencl.CommandQueue(self.context)
+        else:
+            self.queue = queue
+
+    def _create_eps(self, epsilon: List[numpy.ndarray]):
+        if len(epsilon) != 3:
+            raise Exception('Epsilon must be a list with length of 3')
+        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
+            raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+        if not epsilon[0].shape == self.shape:
+            raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
+        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
+
+    def _create_field(self, initial_value: List[numpy.ndarray] = None):
+        if initial_value is None:
+            return pyopencl.array.zeros_like(self.eps)
+        else:
+            if len(initial_value) != 3:
+                Exception('Initial field value must be a list of length 3')
+            if not all((f.shape == self.shape for f in initial_value)):
+                Exception('Initial field list elements must have same shape as epsilon elements')
+            return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
+
+
+def type_to_C(float_type) -> str:
+    """
+    Returns a string corresponding to the C equivalent of a numpy type.
+    Only works for float16, float32, float64.
+
+    :param float_type: e.g. numpy.float32
+    :return: string containing the corresponding C type (eg. 'double')
+    """
+    if float_type == numpy.float16:
+        arg_type = 'half'
+    elif float_type == numpy.float32:
+        arg_type = 'float'
+    elif float_type == numpy.float64:
+        arg_type = 'double'
+    else:
+        raise Exception('Unsupported type')
+    return arg_type
--- a/requirements.txt
+++ b/requirements.txt
@ -2,7 +2,7 @@ numpy
 h5py
 pyopencl
 jinja2
-e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster
-e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock
-e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque
-e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools
+-e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster
+-e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock
+-e git+https://mpxd.net/code/jan/masque.git@release#egg=masque
+-e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+from setuptools import setup, find_packages
+import opencl_fdtd
+
+with open('README.md', 'r') as f:
+    long_description = f.read()
+
+setup(name='opencl_fdtd',
+      version=opencl_fdtd.version,
+      description='OpenCL FDTD solver',
+      long_description=long_description,
+      long_description_content_type='text/markdown',
+      author='Jan Petykiewicz',
+      author_email='anewusername@gmail.com',
+      url='https://mpxd.net/code/jan/opencl_fdtd',
+      packages=find_packages(),
+      package_data={
+          'opencl_fdfd': ['kernels/*']
+      },
+      install_requires=[
+            'numpy',
+            'pyopencl',
+            'jinja2',
+            'fdfd_tools>=0.3',
+      ],
+      extras_require={
+      },
+      )
+
Author	SHA1	Message	Date
Jan Petykiewicz	d5fd78d493	Revert "Add restrict keyword to pointers (not sharing the same memory for multiple fields)" This reverts commit 60b70bb332d29fb5a2aecb801517be659cf4daf9. It appears to have minimal performance impact, and fails to compile on nvidia cards.	2019-07-17 00:55:28 -07:00
Jan Petykiewicz	fe2f4f4510	style fixes	2019-07-15 00:06:43 -07:00
Jan Petykiewicz	314e36a3cc	fix for 1-sized grids	2019-07-15 00:06:43 -07:00
Jan Petykiewicz	a37df3a74f	expand gitignores	2019-07-15 00:06:43 -07:00
jan	582dafbc2f	Update example with bloch fields	2018-11-30 01:04:00 -08:00
jan	385e2859a9	Update parameter descriptions	2018-11-30 01:03:20 -08:00
jan	0f0f16e184	Minor formatting changes	2018-11-30 01:03:10 -08:00
Jan Petykiewicz	a76e741d32	Minor spacing changes	2018-11-30 00:37:16 -08:00
Jan Petykiewicz	60b70bb332	Add restrict keyword to pointers (not sharing the same memory for multiple fields)	2018-11-30 00:37:07 -08:00
Jan Petykiewicz	9cf50b1d88	Enable nonzero pml alpha	2018-11-30 00:36:32 -08:00
Jan Petykiewicz	65cc118392	Remove debug code	2018-11-30 00:36:22 -08:00
Jan Petykiewicz	f5a5a0ad04	Enable nonuniform grids (minimally tested)	2018-11-30 00:36:11 -08:00
Jan Petykiewicz	5ffe547640	Typo fixes and minor comment updates	2018-11-30 00:13:27 -08:00
Jan Petykiewicz	3ac20f6271	Add bloch boundaries (untested)	2018-11-29 02:01:48 -08:00
Jan Petykiewicz	d8dc024626	Implementation of "source" fields (J)	2018-11-29 02:01:28 -08:00
Jan Petykiewicz	cb471df182	Implement proper kappa for PML	2018-11-29 02:00:30 -08:00
Jan Petykiewicz	f00c8b4a3e	Add _create_context(), _create_operation(), and _create_pmls(), and generalize initial field value args	2018-11-29 01:59:05 -08:00
Jan Petykiewicz	2b1d906665	Add _create_field() and _create_eps()	2018-11-29 01:38:28 -08:00
Jan Petykiewicz	1e874cb0c0	Fix sign on H component of PML	2018-11-29 01:36:13 -08:00
Jan Petykiewicz	ea5e298023	Explicitly cast to int	2018-11-29 01:35:20 -08:00
Jan Petykiewicz	1de6fb0e39	Use readme as long_description	2018-09-16 20:12:52 -07:00
Jan Petykiewicz	d0b303523e	Move version string into module	2018-09-16 20:12:41 -07:00
Jan Petykiewicz	c8298d916f	Use python3 for setup.py	2018-09-16 20:12:26 -07:00
Jan Petykiewicz	aab57412a5	move code to new location	2018-01-15 22:36:40 -08:00
jan	a67009d1c8	fix declaration	2017-10-06 14:20:10 -07:00
jan	6a03977a69	fix pml param names	2017-10-06 14:12:51 -07:00
jan	9772f47a34	fix typo	2017-10-06 14:01:22 -07:00
jan	6bd756c3d3	add setup.py	2017-10-06 13:50:57 -07:00
jan	d02cd18403	improve pml specification	2017-10-06 13:49:46 -07:00
jan	c137da15b9	Merge branch 'master' of mpxd.net:jan/opencl_fdtd	2017-08-24 11:28:14 -07:00
jan	a85f547749	doc updates	2017-08-24 11:28:03 -07:00
jan	0d91f0d43e	rename lib	2017-08-14 15:41:20 -07:00
Jan Petykiewicz	97d9901e4b	Use logging package for output	2017-08-12 19:20:29 -07:00