try out opencl image3d_t... it runs in the base case, no idea if correctly

14 changed files with 380 additions and 680 deletions
--- a/README.md
+++ b/README.md
@ -27,16 +27,14 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs).
 * numpy
 * pyopencl
 * jinja2
-* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools)
-
-Optional (used for examples):
 * dill (for file output)
-* [gridlock](https://mpxd.net/code/jan/gridlock)
-* [masque](https://mpxd.net/code/jan/masque)
+* [gridlock](https://mpxd.net/gogs/jan/gridlock)
+* [masque](https://mpxd.net/gogs/jan/masque)
+* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)

 To get the code, just clone this repository:
 ```bash
-git clone https://mpxd.net/code/jan/opencl_fdtd.git
+git clone https://mpxd.net/gogs/jan/opencl_fdtd.git
 ```

 You can install the requirements and their dependencies easily with
--- a/fdtd.py
+++ b/fdtd.py
@ -6,13 +6,12 @@ See main() for simulation setup.

 import sys
 import time
-import logging

 import numpy
-import lzma
+import lzma 
 import dill

-from opencl_fdtd import Simulation
+from fdtd.simulation import Simulation
 from masque import Pattern, shapes
 import gridlock
 import pcgen
@ -21,9 +20,6 @@ import fdfd_tools

 __author__ = 'Jan Petykiewicz'

-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger(__name__)
-

 def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
    """
@ -124,13 +120,9 @@ def main():
                       eps=n_air**2,
                       polygons=mask.as_polygons())

-    logger.info('grid shape: {}'.format(grid.shape))
+    print('grid shape: {}'.format(grid.shape))
    # #### Create the simulation grid ####
-    pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
-            for a in 'xyz' for p in 'np']
-    #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
-    bloch = []
-    sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
+    sim = Simulation(grid.grids, do_poynting=False, pmls=[])

    # Source parameters and function
    w = 2 * numpy.pi * dx / wl
@ -141,41 +133,31 @@ def main():
    def field_source(i):
        t0 = i * sim.dt - delay
        return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2)
-
+   
    with open('sources.c', 'w') as f:
        f.write(sim.sources['E'])
-        f.write('\n====================H======================\n')
+        f.write('\n==========================================\n')
        f.write(sim.sources['H'])
        if sim.update_S:
-            f.write('\n=====================S=====================\n')
+            f.write('\n==========================================\n')
            f.write(sim.sources['S'])
-        if bloch:
-            f.write('\n=====================F=====================\n')
-            f.write(sim.sources['F'])
-            f.write('\n=====================G=====================\n')
-            f.write(sim.sources['G'])

    # #### Run a bunch of iterations ####
    # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
    #  immediately and run once prev_event is finished.
    start = time.perf_counter()
    for t in range(max_t):
-        e = sim.update_E([])
-        if bloch:
-            e = sim.update_F([e])
-        e.wait()
+        sim.update_E([]).wait()

        ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
-        sim.E[ind] += field_source(t)
+#        sim.buf[ind] += field_source(t)
        e = sim.update_H([])
-        if bloch:
-            e = sim.update_G([e])
        if sim.update_S:
            e = sim.update_S([e])
        e.wait()

        if t % 100 == 0:
-            logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
+            print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
            sys.stdout.flush()

    with lzma.open('saved_simulation', 'wb') as f:
@ -190,6 +172,5 @@ def main():
            d['S'] = unvec(sim.S.get())
        dill.dump(d, f)

-
 if __name__ == '__main__':
    main()
--- a/fdtd/init.py
+++ b/fdtd/init.py
--- a/opencl_fdtd/kernels/common.cl
+++ b/opencl_fdtd/kernels/common.cl
@ -2,14 +2,10 @@
 /* Common code for E, H updates
 *
 * Template parameters:
- *  ftype	type name (e.g. float or double)
 *  shape       list of 3 ints specifying shape of fields
 */
 #}

-typedef {{ftype}} ftype;
-
-
 /*
 * Field size info
 */
@ -18,13 +14,6 @@ const size_t sy = {{shape[1]}};
 const size_t sz = {{shape[2]}};
 const size_t field_size = sx * sy * sz;

-//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
-// i is outside the bounds of Ex[].
-if (i >= field_size) {
-    PYOPENCL_ELWISE_CONTINUE;
-}
-
-
 /*
 * Array indexing
 */
@ -42,25 +31,6 @@ const size_t diy = sz;
 const size_t diz = 1;


-/*
- * Pointer math
- */
-//Pointer offsets into the components of a linearized vector-field
-// (eg. Hx = H + XX, where H and Hx are pointers)
-const size_t XX = 0;
-const size_t YY = field_size;
-const size_t ZZ = field_size * 2;
-
-//Define pointers to vector components of each field (eg. Hx = H + XX)
-__global ftype *Ex = E + XX;
-__global ftype *Ey = E + YY;
-__global ftype *Ez = E + ZZ;
-
-__global ftype *Hx = H + XX;
-__global ftype *Hy = H + YY;
-__global ftype *Hz = H + ZZ;
-
-
 /*
 * Implement periodic boundary conditions
 *
@ -73,9 +43,9 @@ __global ftype *Hz = H + ZZ;
 * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
 */
 {% for r in 'xyz' %}
-int m{{r}} = - (int) di{{r}};
-int p{{r}} = + (int) di{{r}};
-int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}};
+int m{{r}} = -1;
+int p{{r}} = +1;
+int wrap_{{r}} = s{{r}} - 1;
 if ( {{r}} == 0 ) {
  m{{r}} = wrap_{{r}};
 } else if ( {{r}} == s{{r}} - 1 ) {
--- a/fdtd/kernels/update_e.cl
+++ b/fdtd/kernels/update_e.cl
@ -0,0 +1,78 @@
+/*
+ *  Update E-field, including any PMLs.
+ *  
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
+ *   pml_thickness: Number of cells (integer)
+ *   
+ *  OpenCL args:
+ *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+__global ftype *epsx = eps + XX;
+__global ftype *epsy = eps + YY;
+__global ftype *epsz = eps + ZZ;
+
+{% if pmls -%}
+const int pml_thickness = {{pml_thickness}};
+{%- endif %}
+
+/*
+ *   Precalclate derivatives
+ */
+ftype dHxy = Hx[i] - Hx[i + my];
+ftype dHxz = Hx[i] - Hx[i + mz];
+
+ftype dHyx = Hy[i] - Hy[i + mx];
+ftype dHyz = Hy[i] - Hy[i + mz];
+
+ftype dHzx = Hz[i] - Hz[i + mx];
+ftype dHzy = Hz[i] - Hz[i + my];
+
+/*
+ *   PML Update
+ */
+// PML effects on E
+ftype pExi = 0;
+ftype pEyi = 0;
+ftype pEzi = 0;
+
+{% for r, p in pmls -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+    {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
+    {%- if r != 'y' -%}
+        {%- set se, sh = '-', '+' -%}
+    {%- else -%}
+        {%- set se, sh = '+', '-' -%}
+    {%- endif -%}
+
+    {%- if p == 'n' %}
+
+if ( {{r}} < pml_thickness ) {
+    const size_t ir = {{r}};                          // index into pml parameters
+
+    {%- elif p == 'p' %}
+
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
+
+    {%- endif %}
+    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
+    pE{{u}}i {{se}}= {{psi ~ u}}[ip];
+    pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
+}
+{%- endfor %}
+
+/*
+ *  Update E
+ */
+Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
+Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
+Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@ -1,58 +1,36 @@
 /*
 *  Update H-field, including any PMLs.
 *   Also precalculate values for poynting vector if necessary.
- *
+ *  
 *  Template parameters:
 *   common_header: Rendered contents of common.cl
- *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
- *      axes, polarities, and thicknesses.
- *   do_poynting: Whether to precalculate poynting vector components (boolean)
- *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
- *      Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
- *      OpenCL args.
+ *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
+ *   pml_thickness: Number of cells (integer)
+ *   do_poynting: Whether to precalculate poynting vector components (boolean)  
 *
 *  OpenCL args:
- *   E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
 */

 {{common_header}}

 ////////////////////////////////////////////////////////////////////////////

+{% if pmls -%}
+const int pml_thickness = {{pml_thickness}};
+{%- endif %}
+
 /*
 *  Precalculate derivatives
 */
-{%- if uniform_dx %}
-ftype inv_dx = 1.0 / {{uniform_dx}};
-ftype inv_dy = 1.0 / {{uniform_dx}};
-ftype inv_dz = 1.0 / {{uniform_dx}};
-{%- else %}
-ftype inv_dx = inv_dex[x];
-ftype inv_dy = inv_dey[y];
-ftype inv_dz = inv_dez[z];
-{%- endif %}
+ftype dExy = Ex[i + py] - Ex[i];
+ftype dExz = Ex[i + pz] - Ex[i];

+ftype dEyx = Ey[i + px] - Ey[i];
+ftype dEyz = Ey[i + pz] - Ey[i];

-ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy;
-ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz;
-
-ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx;
-ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz;
-
-ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx;
-ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy;
-
-
-{% for bloch in bloch_boundaries -%}
-    {%- set r = bloch['axis'] -%}
-    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
-if ({{r}} == s{{r}} - 1) {
-    ftype bloch_re = {{bloch['real']}};
-    ftype bloch_im = {{bloch['imag']}};
-    dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
-    dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
-}
-{% endfor -%}
+ftype dEzx = Ez[i + px] - Ez[i];
+ftype dEzy = Ez[i + py] - Ez[i];

 {%- if do_poynting %}

@ -71,8 +49,6 @@ ftype aEzy = Ez[i + py] + Ez[i];
 {%- endif %}


-
-
 /*
 *  PML Update
 */
@ -81,35 +57,29 @@ ftype pHxi = 0;
 ftype pHyi = 0;
 ftype pHzi = 0;

-{% for pml in pmls -%}
-    {%- set r = pml['axis'] -%}
-    {%- set p = pml['polarity'] -%}
+{%- for r, p in pmls -%}
    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
    {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
    {%- if r != 'y' -%}
        {%- set se, sh = '-', '+' -%}
    {%- else -%}
        {%- set se, sh = '+', '-' -%}
-    {%- endif %}
-
-int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
+    {%- endif -%}

    {%- if p == 'n' %}

-if ( {{r}} < pml_{{r ~ p}}_thickness ) {
+if ( {{r}} < pml_thickness ) {
    const size_t ir = {{r}};                          // index into pml parameters

    {%- elif p == 'p' %}

-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters

    {%- endif %}
    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
-    dE{{v ~ r}} *= p{{r}}2h{{p}}[ir];
-    dE{{u ~ r}} *= p{{r}}2h{{p}}[ir];
-    {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}};
-    {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}};
+    {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
+    {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
    pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
    pH{{v}}i {{se}}= {{psi ~ v}}[ip];
 }
@ -126,9 +96,9 @@ ftype Hz_old = Hz[i];
 {%- endif %}

 // H update equations
-Hx[i] -= dt * (dEzy - dEyz - pHxi);
-Hy[i] -= dt * (dExz - dEzx - pHyi);
-Hz[i] -= dt * (dEyx - dExy - pHzi);
+Hx[i] -= dt * (dEzy - dEyz + pHxi);
+Hy[i] -= dt * (dExz - dEzx + pHyi);
+Hz[i] -= dt * (dEyx - dExy + pHzi);

 {% if do_poynting -%}
 // Average H across timesteps
--- a/opencl_fdtd/kernels/update_s.cl
+++ b/opencl_fdtd/kernels/update_s.cl
@ -1,11 +1,11 @@
 /*
 *  Update E-field, including any PMLs.
- *
+ *  
 *  Template parameters:
 *   common_header: Rendered contents of common.cl
 *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
 *   pml_thickness: Number of cells (integer)
- *
+ *   
 *  OpenCL args:
 *   E, H, dt, S, oS
 */
@ -17,12 +17,12 @@

 /*
 * Calculate S from oS (pre-calculated components)
- */
+ */ 
 __global ftype *Sx = S + XX;
 __global ftype *Sy = S + YY;
 __global ftype *Sz = S + ZZ;

-// Use unscaled S components from H locations
+// Use unscaled S components from H locations 
 __global ftype *oSxy = oS + 0 * field_size;
 __global ftype *oSyz = oS + 1 * field_size;
 __global ftype *oSzx = oS + 2 * field_size;
--- a/fdtd/simulation.py
+++ b/fdtd/simulation.py
@ -0,0 +1,253 @@
+"""
+Class for constructing and holding the basic FDTD operations and fields
+"""
+
+from typing import List, Dict, Callable
+from collections import OrderedDict
+import numpy
+import jinja2
+import warnings
+
+import pyopencl
+import pyopencl.array
+from pyopencl.elementwise import ElementwiseKernel
+
+from fdfd_tools import vec
+
+
+__author__ = 'Jan Petykiewicz'
+
+float4 = pyopencl.array.vec.float4
+
+# Create jinja2 env on module load
+jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
+
+
+class Simulation(object):
+    """
+    Constructs and holds the basic FDTD operations and related fields
+    """
+    E = None    # type: List[pyopencl.array.Array]
+    H = None    # type: List[pyopencl.array.Array]
+    S = None    # type: List[pyopencl.array.Array]
+    eps = None  # type: List[pyopencl.array.Array]
+    dt = None   # type: float
+
+    arg_type = None     # type: numpy.float32 or numpy.float64
+
+    context = None      # type: pyopencl.Context
+    queue = None        # type: pyopencl.CommandQueue
+
+    update_E = None     # type: Callable[[],pyopencl.Event]
+    update_H = None     # type: Callable[[],pyopencl.Event]
+    update_S = None     # type: Callable[[],pyopencl.Event]
+    sources = None      # type: Dict[str, str]
+
+    def __init__(self,
+                 epsilon: List[numpy.ndarray],
+                 dt: float = .99/numpy.sqrt(3),
+                 initial_E: List[numpy.ndarray] = None,
+                 initial_H: List[numpy.ndarray] = None,
+                 context: pyopencl.Context = None,
+                 queue: pyopencl.CommandQueue = None,
+                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
+                 pml_thickness: int = 10,
+                 pmls: List[List[str]] = None,
+                 do_poynting: bool = True):
+        """
+        Initialize the simulation.
+
+        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
+                spanning the simulation domain. Relative epsilon is used.
+        :param dt: Time step. Default is the Courant factor.
+        :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
+        :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
+        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
+        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
+        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+        """
+
+        if len(epsilon) != 3:
+            Exception('Epsilon must be a list with length of 3')
+        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
+            Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+
+        if context is None:
+            self.context = pyopencl.create_some_context()
+        else:
+            self.context = context
+
+        if queue is None:
+            self.queue = pyopencl.CommandQueue(self.context)
+        else:
+            self.queue = queue
+
+        if dt > .99/numpy.sqrt(3):
+            warnings.warn('Warning: unstable dt: {}'.format(dt))
+        elif dt <= 0:
+            raise Exception('Invalid dt: {}'.format(dt))
+        else:
+            self.dt = dt
+
+        def make4d(f):
+            g = [numpy.transpose(fi)[:,:,:,None] for fi in f]
+            h = g + [numpy.empty_like(g[0])]
+            j = numpy.concatenate(h, axis=3)
+            return numpy.ascontiguousarray(j, dtype=numpy.float32) 
+
+        self.arg_type = float_type
+        self.sources = {}
+        ef = make4d(epsilon).astype(float_type)
+        self.eps = pyopencl.image_from_array(self.context,
+                make4d(epsilon), num_channels=4, mode='r')
+
+        self.buf = pyopencl.array.Array(self.queue, shape=epsilon.shape, dtype=float4)
+
+        if initial_E is None:
+            self.E = pyopencl.image_from_array(self.context,
+                make4d(epsilon) * 0, num_channels=4, mode='r')
+        else:
+            if len(initial_E) != 3:
+                Exception('Initial_E must be a list of length 3')
+            if not all((E.shape == epsilon[0].shape for E in initial_E)):
+                Exception('Initial_E list elements must have same shape as epsilon elements')
+            self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
+
+        if initial_H is None:
+            self.H = pyopencl.image_from_array(self.context,
+                make4d(epsilon) * 0, num_channels=4, mode='r')
+        else:
+            if len(initial_H) != 3:
+                Exception('Initial_H must be a list of length 3')
+            if not all((H.shape == epsilon[0].shape for H in initial_H)):
+                Exception('Initial_H list elements must have same shape as epsilon elements')
+            self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
+
+        if pmls is None:
+            pmls = [[d, p] for d in 'xyz' for p in 'np']
+
+        ctype = type_to_C(self.arg_type)
+
+        def ptr(arg: str) -> str:
+            return ctype + ' *' + arg
+
+        base_fields = OrderedDict()
+
+        common_source = jinja_env.get_template('common.cl').render(
+                shape=epsilon[0].shape,
+                )
+        jinja_args = {
+                'common_header': common_source,
+                'pml_thickness': pml_thickness,
+                'pmls': pmls,
+                'do_poynting': do_poynting,
+                }
+        E_source = jinja_env.get_template('update_e_full.cl').render(**jinja_args)
+        H_source = jinja_env.get_template('update_h_full.cl').render(**jinja_args)
+
+        self.sources['E'] = E_source
+        self.sources['H'] = H_source
+
+        if do_poynting:
+            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
+            self.sources['S'] = S_source
+
+            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type)
+            self.S = pyopencl.array.zeros_like(self.E)
+            S_fields = OrderedDict()
+            S_fields[ptr('oS')] = self.oS
+            S_fields[ptr('S')] = self.S
+        else:
+            S_fields = OrderedDict() 
+
+        '''
+        PML
+        '''
+        m = (3.5, 1)
+        sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0)
+        alpha_max = 0  # TODO: Decide what to do about non-zero alpha
+
+        def par(x):
+            sigma = ((x / pml_thickness) ** m[0]) * sigma_max
+            alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max
+            p0 = numpy.exp(-(sigma + alpha) * dt)
+            p1 = sigma / (sigma + alpha) * (p0 - 1)
+            return p0, p1
+        
+        xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
+        xep -= 0.5
+        xhn -= 0.5
+
+        pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh']
+        pml_e_fields = OrderedDict()
+        pml_h_fields = OrderedDict()
+        for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)):
+            pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe)
+            pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph)
+
+        for pml in pmls:
+            uv = 'xyz'.replace(pml[0], '')
+            psi_base = 'Psi_' + ''.join(pml) + '_'
+            psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
+
+            psi_shape = list(epsilon[0].shape)
+            psi_shape['xyz'.find(pml[0])] = pml_thickness
+
+            for ne, nh in zip(*psi_names):
+                pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+                pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+        
+        self.pml_e_fields = pml_e_fields
+        self.pml_h_fields = pml_h_fields
+
+
+        '''
+        Create operations
+        '''
+        E_update = pyopencl.Program(self.context, E_source).build().update_e
+        H_update = pyopencl.Program(self.context, H_source).build().update_h
+        max_gs = E_update.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE,
+                                      self.queue.device)
+        gs, ls = self.buf.get_sizes(self.queue, max_gs)
+        print('gs', gs, ls, max_gs)
+        def update_E(e):
+            e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.E, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e)
+            e = E_update(self.queue, gs, ls, self.buf.data, self.H, numpy.float32(dt), self.eps, numpy.uint32(self.buf.size), wait_for=[e])
+            return e
+
+        def update_H(e):
+            e = pyopencl.enqueue_copy(self.queue, self.E, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e)
+            e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.H, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e])
+            e = H_update(self.queue, gs, ls, self.E, self.buf.data, numpy.float32(dt), numpy.uint32(self.buf.size), wait_for=[e])
+            e = pyopencl.enqueue_copy(self.queue, self.H, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e])
+            return e
+
+        self.update_E = update_E
+        self.update_H = update_H
+
+        if do_poynting:
+            S_args = OrderedDict()
+            [S_args.update(d) for d in (base_fields, S_fields)]
+            S_update = ElementwiseKernel(self.context, operation=S_source,
+                                         arguments=', '.join(S_args.keys()))
+
+            self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
+
+
+def type_to_C(float_type) -> str:
+    """
+    Returns a string corresponding to the C equivalent of a numpy type.
+    Only works for float16, float32, float64.
+
+    :param float_type: e.g. numpy.float32
+    :return: string containing the corresponding C type (eg. 'double')
+    """
+    if float_type == numpy.float16:
+        arg_type = 'half'
+    elif float_type == numpy.float32:
+        arg_type = 'float'
+    elif float_type == numpy.float64:
+        arg_type = 'double'
+    else:
+        raise Exception('Unsupported type')
+    return arg_type
--- a/opencl_fdtd/init.py
+++ b/opencl_fdtd/init.py
@ -1,5 +0,0 @@
-from .simulation import Simulation, type_to_C
-
-__author__ = 'Jan Petykiewicz'
-
-version = '0.4'
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@ -1,107 +0,0 @@
-/*
- *  Update E-field, including any PMLs.
- *
- *  Template parameters:
- *   common_header: Rendered contents of common.cl
- *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
- *      axes, polarities, and thicknesses.
- *   uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
- *      Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as
- *      OpenCL args.
- *
- *  OpenCL args:
- *   E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}]
- */
-
-{{common_header}}
-
-////////////////////////////////////////////////////////////////////////////
-
-__global ftype *epsx = eps + XX;
-__global ftype *epsy = eps + YY;
-__global ftype *epsz = eps + ZZ;
-
-
-{%- if uniform_dx %}
-ftype inv_dx = 1.0 / {{uniform_dx}};
-ftype inv_dy = 1.0 / {{uniform_dx}};
-ftype inv_dz = 1.0 / {{uniform_dx}};
-{%- else %}
-ftype inv_dx = inv_dhx[x];
-ftype inv_dy = inv_dhy[y];
-ftype inv_dz = inv_dhz[z];
-{%- endif %}
-
-
-/*
- *   Precalculate derivatives
- */
-ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy;
-ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz;
-
-ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx;
-ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz;
-
-ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx;
-ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy;
-
-{% for bloch in bloch_boundaries -%}
-    {%- set r = bloch['axis'] -%}
-    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
-if ({{r}} == 0) {
-    ftype bloch_im = {{bloch['real']}};
-    ftype bloch_re = {{bloch['imag']}};
-    dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]);
-    dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]);
-}
-{%- endfor %}
-
-
-/*
- *   PML Update
- */
-// PML effects on E
-ftype pExi = 0;
-ftype pEyi = 0;
-ftype pEzi = 0;
-
-{% for pml in pmls -%}
-    {%- set r = pml['axis'] -%}
-    {%- set p = pml['polarity'] -%}
-    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
-    {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
-    {%- if r != 'y' -%}
-        {%- set se, sh = '-', '+' -%}
-    {%- else -%}
-        {%- set se, sh = '+', '-' -%}
-    {%- endif -%}
-
-int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
-
-    {%- if p == 'n' %}
-
-if ( {{r}} < pml_{{r ~ p}}_thickness ) {
-    const size_t ir = {{r}};                          // index into pml parameters
-
-    {%- elif p == 'p' %}
-
-if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
-    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
-
-    {%- endif %}
-    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
-    dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
-    dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
-    {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
-    {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
-    pE{{u}}i {{se}}= {{psi ~ u}}[ip];
-    pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
-}
-{%- endfor %}
-
-/*
- *  Update E
- */
-Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
-Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
-Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
--- a/opencl_fdtd/kernels/update_j.cl
+++ b/opencl_fdtd/kernels/update_j.cl
@ -1,32 +0,0 @@
-/*
- *  Update E-field from J field 
- *
- *  Template parameters:
- *   common_header: Rendered contents of common.cl
- *
- *  OpenCL args:
- *   E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax   
- */
-
-{{common_header}}
-
-////////////////////////////////////////////////////////////////////////////
-
-__global ftype *Jrx = Jr + XX;
-__global ftype *Jry = Jr + YY;
-__global ftype *Jrz = Jr + ZZ;
-__global ftype *Jix = Ji + XX;
-__global ftype *Jiy = Ji + YY;
-__global ftype *Jiz = Ji + ZZ;
-
-
-if (x < xmin || y < ymin || z < zmin) {
-   PYOPENCL_ELWISE_CONTINUE;
-}
-if (x >= xmax || y >= ymax || z >= zmax) {
-   PYOPENCL_ELWISE_CONTINUE;
-}
-
-Ex[i] += c * Jrx[i] + s * Jix[i];
-Ey[i] += c * Jry[i] + s * Jiy[i];
-Ez[i] += c * Jrz[i] + s * Jiz[i];
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@ -1,376 +0,0 @@
-"""
-Class for constructing and holding the basic FDTD operations and fields
-"""
-
-from typing import List, Dict, Callable
-from collections import OrderedDict
-import numpy
-import jinja2
-import warnings
-
-import pyopencl
-import pyopencl.array
-from pyopencl.elementwise import ElementwiseKernel
-
-from fdfd_tools import vec
-
-
-__author__ = 'Jan Petykiewicz'
-
-
-# Create jinja2 env on module load
-jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
-
-
-class Simulation(object):
-    """
-    Constructs and holds the basic FDTD operations and related fields
-
-    After constructing this object, call the (update_E, update_H, update_S) members
-     to perform FDTD updates on the stored (E, H, S) fields:
-
-        pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
-        sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
-        with open('sources.c', 'w') as f:
-            f.write('{}'.format(sim.sources))
-
-        for t in range(max_t):
-            sim.update_E([]).wait()
-
-            # Find the linear index for the center point, for Ey
-            ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \
-                    numpy.prod(grid.shape) * 1
-            # Perturb the field (i.e., add a soft current source)
-            sim.E[ind] += numpy.sin(omega * t * sim.dt)
-            event = sim.update_H([])
-            if sim.update_S:
-                event = sim.update_S([event])
-            event.wait()
-
-            with lzma.open('saved_simulation', 'wb') as f:
-                dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
-
-    Code in the form
-        event2 = sim.update_H([event0, event1])
-     indicates that the update_H operation should be prepared immediately, but wait for
-     event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
-     event2 can then be used to prepare further operations to be run after update_H.
-    """
-    E = None            # type: pyopencl.array.Array
-    H = None            # type: pyopencl.array.Array
-    S = None            # type: pyopencl.array.Array
-    eps = None          # type: pyopencl.array.Array
-    dt = None           # type: float
-    inv_dxes = None     # type: List[pyopencl.array.Array]
-
-    arg_type = None     # type: numpy.float32 or numpy.float64
-
-    context = None      # type: pyopencl.Context
-    queue = None        # type: pyopencl.CommandQueue
-
-    update_E = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_H = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_S = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    update_J = None     # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
-    sources = None      # type: Dict[str, str]
-
-    def __init__(self,
-                 epsilon: List[numpy.ndarray],
-                 pmls: List[Dict[str, int or float]],
-                 bloch_boundaries: List[Dict[str, int or float]] = (),
-                 dxes: List[List[numpy.ndarray]] or float = None,
-                 dt: float = None,
-                 initial_fields: Dict[str, List[numpy.ndarray]] = None,
-                 context: pyopencl.Context = None,
-                 queue: pyopencl.CommandQueue = None,
-                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
-                 do_poynting: bool = True,
-                 do_fieldsrc: bool = False):
-        """
-        Initialize the simulation.
-
-        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
-                spanning the simulation domain. Relative epsilon is used.
-        :param pmls: List of dicts with keys:
-            'axis': One of 'x', 'y', 'z'.
-            'direction': One of 'n', 'p'.
-            'thickness': Number of layers, default 8.
-            'epsilon_eff': Effective epsilon to match to. Default 1.0.
-            'mu_eff': Effective mu to match to. Default 1.0.
-            'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
-            'm': Polynomial grading exponent. Default 3.5.
-            'ma': Exponent for alpha. Default 1.
-        :param bloch_boundaries: List of dicts with keys:
-            'axis': One of 'x', 'y', 'z'.
-            'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
-            'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
-        :param dt: Time step. Default is min(dxes) * .99/sqrt(3).
-        :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
-            specified fields (default is 0 everywhere). Fields have same format as epsilon.
-        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
-        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
-        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
-        :param do_poynting: If true, enables calculation of the poynting vector, S.
-                Poynting vector calculation adds the following computational burdens:
-                    * During update_H, ~6 extra additions/cell are performed in order to spatially
-                        average E and temporally average H. These quantities are multiplied
-                        (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
-                    * update_S performs a discrete cross product using the precalculated products
-                        from update_H. This is not nice to the cache and similar to e.g. update_E
-                        in complexity.
-                    * GPU memory requirements are approximately doubled, since S and the intermediate
-                        products must be stored.
-        """
-        if initial_fields is None:
-            initial_fields = {}
-
-        self.shape = epsilon[0].shape
-        self.arg_type = float_type
-        self.sources = {}
-        self._create_context(context, queue)
-        self._create_eps(epsilon)
-
-        if dxes is None:
-            dxes = 1.0
-
-        if isinstance(dxes, (float, int)):
-            uniform_dx = dxes
-            min_dx = dxes
-        else:
-            uniform_dx = False
-            self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]]
-            min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
-
-        max_dt = min_dx * .99 / numpy.sqrt(3)
-
-        if dt is None:
-            self.dt = max_dt
-        elif dt > max_dt:
-            warnings.warn('Warning: unstable dt: {}'.format(dt))
-        elif dt <= 0:
-            raise Exception('Invalid dt: {}'.format(dt))
-        else:
-            self.dt = dt
-
-        self.E = self._create_field(initial_fields.get('E', None))
-        self.H = self._create_field(initial_fields.get('H', None))
-        if bloch_boundaries:
-            self.F = self._create_field(initial_fields.get('F', None))
-            self.G = self._create_field(initial_fields.get('G', None))
-
-        for pml in pmls:
-            pml.setdefault('thickness', 8)
-            pml.setdefault('epsilon_eff', 1.0)
-            pml.setdefault('mu_eff', 1.0)
-            pml.setdefault('ln_R_per_layer', -1.6)
-            pml.setdefault('m', 3.5)
-            pml.setdefault('cfs_alpha', 0)
-            pml.setdefault('ma', 1)
-
-        ctype = type_to_C(self.arg_type)
-
-        def ptr(arg: str) -> str:
-            return ctype + ' * restrict ' + arg
-
-        base_fields = OrderedDict()
-        base_fields[ptr('E')] = self.E
-        base_fields[ptr('H')] = self.H
-        base_fields[ctype + ' dt'] = self.dt
-        if uniform_dx == False:
-            inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
-            for name, field in zip(inv_dx_names, self.inv_dxes):
-                base_fields[ptr(name)] = field
-
-        eps_field = OrderedDict()
-        eps_field[ptr('eps')] = self.eps
-
-        if bloch_boundaries:
-            base_fields[ptr('F')] = self.F
-            base_fields[ptr('G')] = self.G
-
-            bloch_fields = OrderedDict()
-            bloch_fields[ptr('E')] = self.F
-            bloch_fields[ptr('H')] = self.G
-            bloch_fields[ctype + ' dt'] = self.dt
-            bloch_fields[ptr('F')] = self.E
-            bloch_fields[ptr('G')] = self.H
-
-        common_source = jinja_env.get_template('common.cl').render(
-                ftype=ctype,
-                shape=self.shape,
-                )
-        jinja_args = {
-                'common_header': common_source,
-                'pmls': pmls,
-                'do_poynting': do_poynting,
-                'bloch': bloch_boundaries,
-                'uniform_dx': uniform_dx,
-                }
-        E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
-        H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
-        self.sources['E'] = E_source
-        self.sources['H'] = H_source
-
-        if bloch_boundaries:
-            bloch_args = jinja_args.copy()
-            bloch_args['do_poynting'] = False
-            bloch_args['bloch'] = [{'axis': b['axis'],
-                                    'real': b['imag'],
-                                    'imag': b['real']}
-                                   for b in bloch_boundaries]
-            F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
-            G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
-            self.sources['F'] = F_source
-            self.sources['G'] = G_source
-
-
-        S_fields = OrderedDict()
-        if do_poynting:
-            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
-            self.sources['S'] = S_source
-
-            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
-            self.S = pyopencl.array.zeros_like(self.E)
-            S_fields[ptr('oS')] = self.oS
-            S_fields[ptr('S')] = self.S
-
-        J_fields = OrderedDict()
-        if do_fieldsrc:
-            J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
-            self.sources['J'] = J_source
-
-            self.Ji = pyopencl.array.zeros_like(self.E)
-            self.Jr = pyopencl.array.zeros_like(self.E)
-            J_fields[ptr('Jr')] = self.Jr
-            J_fields[ptr('Ji')] = self.Ji
-
-
-        '''
-        PML
-        '''
-        pml_e_fields, pml_h_fields = self._create_pmls(pmls)
-        if bloch_boundaries:
-            pml_f_fields, pml_g_fields = self._create_pmls(pmls)
-
-        '''
-        Create operations
-        '''
-        self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
-        self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
-        if do_poynting:
-            self.update_S = self._create_operation(S_source, (base_fields, S_fields))
-        if bloch_boundaries:
-            self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
-            self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
-        if do_fieldsrc:
-            args = OrderedDict()
-            [args.update(d) for d in (base_fields, J_fields)]
-            var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
-            update = ElementwiseKernel(self.context, operation=J_source,
-                                       arguments=', '.join(list(args.keys()) + var_args))
-            self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
-
-
-    def _create_pmls(self, pmls):
-        ctype = type_to_C(self.arg_type)
-        def ptr(arg: str) -> str:
-            return ctype + ' *' + arg
-
-        pml_e_fields = OrderedDict()
-        pml_h_fields = OrderedDict()
-        for pml in pmls:
-            a = 'xyz'.find(pml['axis'])
-
-            sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1)
-            kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
-            alpha_max = pml['cfs_alpha']
-
-            def par(x):
-                scaling = ((x / (pml['thickness'])) ** pml['m'])
-                sigma = scaling * sigma_max
-                kappa = 1 + scaling * (kappa_max - 1)
-                alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
-                p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt)
-                p1 = sigma / (sigma + kappa * alpha) * (p0 - 1)
-                p2 = 1/kappa
-                return p0, p1, p2
-
-            xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2))
-            if pml['polarity'] == 'p':
-                xe -= 0.5
-            elif pml['polarity'] == 'n':
-                xh -= 0.5
-
-            pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
-            for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
-                pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
-                pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)
-
-            uv = 'xyz'.replace(pml['axis'], '')
-            psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_'
-            psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
-
-            psi_shape = list(self.shape)
-            psi_shape[a] = pml['thickness']
-
-            for ne, nh in zip(*psi_names):
-                pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
-                pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
-        return pml_e_fields, pml_h_fields
-
-    def _create_operation(self, source, args_fields):
-        args = OrderedDict()
-        [args.update(d) for d in args_fields]
-        update = ElementwiseKernel(self.context, operation=source,
-                                   arguments=', '.join(args.keys()))
-        return lambda e: update(*args.values(), wait_for=e)
-
-    def _create_context(self, context: pyopencl.Context = None,
-                        queue: pyopencl.CommandQueue = None):
-        if context is None:
-            self.context = pyopencl.create_some_context()
-        else:
-            self.context = context
-
-        if queue is None:
-            self.queue = pyopencl.CommandQueue(self.context)
-        else:
-            self.queue = queue
-
-    def _create_eps(self, epsilon: List[numpy.ndarray]):
-        if len(epsilon) != 3:
-            raise Exception('Epsilon must be a list with length of 3')
-        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
-            raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
-        if not epsilon[0].shape == self.shape:
-            raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
-        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
-
-    def _create_field(self, initial_value: List[numpy.ndarray] = None):
-        if initial_value is None:
-            return pyopencl.array.zeros_like(self.eps)
-        else:
-            if len(initial_value) != 3:
-                Exception('Initial field value must be a list of length 3')
-            if not all((f.shape == self.shape for f in initial_value)):
-                Exception('Initial field list elements must have same shape as epsilon elements')
-            return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
-
-
-def type_to_C(float_type) -> str:
-    """
-    Returns a string corresponding to the C equivalent of a numpy type.
-    Only works for float16, float32, float64.
-
-    :param float_type: e.g. numpy.float32
-    :return: string containing the corresponding C type (eg. 'double')
-    """
-    if float_type == numpy.float16:
-        arg_type = 'half'
-    elif float_type == numpy.float32:
-        arg_type = 'float'
-    elif float_type == numpy.float64:
-        arg_type = 'double'
-    else:
-        raise Exception('Unsupported type')
-    return arg_type
--- a/requirements.txt
+++ b/requirements.txt
@ -2,7 +2,7 @@ numpy
 h5py
 pyopencl
 jinja2
-e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster
-e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock
-e git+https://mpxd.net/code/jan/masque.git@release#egg=masque
-e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools
+-e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster
+-e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock
+-e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque
+-e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools
--- a/setup.py
+++ b/setup.py
@ -1,30 +0,0 @@
-#!/usr/bin/env python3
-
-from setuptools import setup, find_packages
-import opencl_fdtd
-
-with open('README.md', 'r') as f:
-    long_description = f.read()
-
-setup(name='opencl_fdtd',
-      version=opencl_fdtd.version,
-      description='OpenCL FDTD solver',
-      long_description=long_description,
-      long_description_content_type='text/markdown',
-      author='Jan Petykiewicz',
-      author_email='anewusername@gmail.com',
-      url='https://mpxd.net/code/jan/opencl_fdtd',
-      packages=find_packages(),
-      package_data={
-          'opencl_fdfd': ['kernels/*']
-      },
-      install_requires=[
-            'numpy',
-            'pyopencl',
-            'jinja2',
-            'fdfd_tools>=0.3',
-      ],
-      extras_require={
-      },
-      )
-