rename lib

2017-08-14 15:41:20 -07:00 · 2017-08-14 15:41:20 -07:00 · 0d91f0d43e
commit 0d91f0d43e
parent c1750e9fde
8 changed files with 8 additions and 6 deletions
--- a/opencl_fdtd/init.py
+++ b/opencl_fdtd/init.py
@ -0,0 +1 @@
+from .simulation import Simulation, type_to_C
--- a/opencl_fdtd/kernels/common.cl
+++ b/opencl_fdtd/kernels/common.cl
@ -0,0 +1,84 @@
+{#
+/* Common code for E, H updates
+ *
+ * Template parameters:
+ *  ftype	type name (e.g. float or double)
+ *  shape       list of 3 ints specifying shape of fields
+ */
+#}
+
+typedef {{ftype}} ftype;
+
+
+/*
+ * Field size info
+ */
+const size_t sx = {{shape[0]}};
+const size_t sy = {{shape[1]}};
+const size_t sz = {{shape[2]}};
+const size_t field_size = sx * sy * sz;
+
+//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
+// i is outside the bounds of Ex[].
+if (i >= field_size) {
+    PYOPENCL_ELWISE_CONTINUE;
+}
+
+
+/*
+ * Array indexing
+ */
+// Given a linear index i and shape (sx, sy, sz), defines x, y, and z
+//  as the 3D indices of the current element (i).
+// (ie, converts linear index [i] to field indices (x, y, z)
+const size_t x = i / (sz * sy);
+const size_t y = (i - x * sz * sy) / sz;
+const size_t z = (i - y * sz - x * sz * sy);
+
+// Calculate linear index offsets corresponding to offsets in 3D
+// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z)
+const size_t dix = sz * sy;
+const size_t diy = sz;
+const size_t diz = 1;
+
+
+/*
+ * Pointer math
+ */
+//Pointer offsets into the components of a linearized vector-field
+// (eg. Hx = H + XX, where H and Hx are pointers)
+const size_t XX = 0;
+const size_t YY = field_size;
+const size_t ZZ = field_size * 2;
+
+//Define pointers to vector components of each field (eg. Hx = H + XX)
+__global ftype *Ex = E + XX;
+__global ftype *Ey = E + YY;
+__global ftype *Ez = E + ZZ;
+
+__global ftype *Hx = H + XX;
+__global ftype *Hy = H + YY;
+__global ftype *Hz = H + ZZ;
+
+
+/*
+ * Implement periodic boundary conditions
+ *
+ * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction.
+ * In the event that we start at x == 0, we actually want to wrap around and grab the cell
+ * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix .
+ *
+ * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction.
+ * In the event that we start at x == (sx - 1), we actually want to wrap around and grab
+ * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
+ */
+{% for r in 'xyz' %}
+int m{{r}} = -di{{r}};
+int p{{r}} = +di{{r}};
+int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
+if ( {{r}} == 0 ) {
+  m{{r}} = wrap_{{r}};
+} else if ( {{r}} == s{{r}} - 1 ) {
+  p{{r}} = -wrap_{{r}};
+} 
+{% endfor %}
--- a/opencl_fdtd/kernels/update_e.cl
+++ b/opencl_fdtd/kernels/update_e.cl
@ -0,0 +1,78 @@
+/*
+ *  Update E-field, including any PMLs.
+ *  
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
+ *   pml_thickness: Number of cells (integer)
+ *   
+ *  OpenCL args:
+ *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+__global ftype *epsx = eps + XX;
+__global ftype *epsy = eps + YY;
+__global ftype *epsz = eps + ZZ;
+
+{% if pmls -%}
+const int pml_thickness = {{pml_thickness}};
+{%- endif %}
+
+/*
+ *   Precalclate derivatives
+ */
+ftype dHxy = Hx[i] - Hx[i + my];
+ftype dHxz = Hx[i] - Hx[i + mz];
+
+ftype dHyx = Hy[i] - Hy[i + mx];
+ftype dHyz = Hy[i] - Hy[i + mz];
+
+ftype dHzx = Hz[i] - Hz[i + mx];
+ftype dHzy = Hz[i] - Hz[i + my];
+
+/*
+ *   PML Update
+ */
+// PML effects on E
+ftype pExi = 0;
+ftype pEyi = 0;
+ftype pEzi = 0;
+
+{% for r, p in pmls -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+    {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
+    {%- if r != 'y' -%}
+        {%- set se, sh = '-', '+' -%}
+    {%- else -%}
+        {%- set se, sh = '+', '-' -%}
+    {%- endif -%}
+
+    {%- if p == 'n' %}
+
+if ( {{r}} < pml_thickness ) {
+    const size_t ir = {{r}};                          // index into pml parameters
+
+    {%- elif p == 'p' %}
+
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
+
+    {%- endif %}
+    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
+    {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
+    pE{{u}}i {{se}}= {{psi ~ u}}[ip];
+    pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
+}
+{%- endfor %}
+
+/*
+ *  Update E
+ */
+Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
+Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
+Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
--- a/opencl_fdtd/kernels/update_h.cl
+++ b/opencl_fdtd/kernels/update_h.cl
@ -0,0 +1,125 @@
+/*
+ *  Update H-field, including any PMLs.
+ *   Also precalculate values for poynting vector if necessary.
+ *  
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
+ *   pml_thickness: Number of cells (integer)
+ *   do_poynting: Whether to precalculate poynting vector components (boolean)  
+ *
+ *  OpenCL args:
+ *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
+ */
+
+{{common_header}}
+
+////////////////////////////////////////////////////////////////////////////
+
+{% if pmls -%}
+const int pml_thickness = {{pml_thickness}};
+{%- endif %}
+
+/*
+ *  Precalculate derivatives
+ */
+ftype dExy = Ex[i + py] - Ex[i];
+ftype dExz = Ex[i + pz] - Ex[i];
+
+ftype dEyx = Ey[i + px] - Ey[i];
+ftype dEyz = Ey[i + pz] - Ey[i];
+
+ftype dEzx = Ez[i + px] - Ez[i];
+ftype dEzy = Ez[i + py] - Ez[i];
+
+{%- if do_poynting %}
+
+
+/*
+ *  Precalculate averaged E
+ */
+ftype aExy = Ex[i + py] + Ex[i];
+ftype aExz = Ex[i + pz] + Ex[i];
+
+ftype aEyx = Ey[i + px] + Ey[i];
+ftype aEyz = Ey[i + pz] + Ey[i];
+
+ftype aEzx = Ez[i + px] + Ez[i];
+ftype aEzy = Ez[i + py] + Ez[i];
+{%- endif %}
+
+
+/*
+ *  PML Update
+ */
+// PML contributions to H
+ftype pHxi = 0;
+ftype pHyi = 0;
+ftype pHzi = 0;
+
+{%- for r, p in pmls -%}
+    {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
+    {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
+    {%- if r != 'y' -%}
+        {%- set se, sh = '-', '+' -%}
+    {%- else -%}
+        {%- set se, sh = '+', '-' -%}
+    {%- endif -%}
+
+    {%- if p == 'n' %}
+
+if ( {{r}} < pml_thickness ) {
+    const size_t ir = {{r}};                          // index into pml parameters
+
+    {%- elif p == 'p' %}
+
+if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
+    const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
+
+    {%- endif %}
+    const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
+    {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
+    {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
+    pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
+    pH{{v}}i {{se}}= {{psi ~ v}}[ip];
+}
+{%- endfor %}
+
+/*
+ *  Update H
+ */
+{% if do_poynting -%}
+// Save old H for averaging
+ftype Hx_old = Hx[i];
+ftype Hy_old = Hy[i];
+ftype Hz_old = Hz[i];
+{%- endif %}
+
+// H update equations
+Hx[i] -= dt * (dEzy - dEyz + pHxi);
+Hy[i] -= dt * (dExz - dEzx + pHyi);
+Hz[i] -= dt * (dEyx - dExy + pHzi);
+
+{% if do_poynting -%}
+// Average H across timesteps
+ftype aHxt = Hx[i] + Hx_old;
+ftype aHyt = Hy[i] + Hy_old;
+ftype aHzt = Hz[i] + Hz_old;
+
+/*
+ *  Calculate unscaled S components at H locations
+ */
+__global ftype *oSxy = oS + 0 * field_size;
+__global ftype *oSyz = oS + 1 * field_size;
+__global ftype *oSzx = oS + 2 * field_size;
+__global ftype *oSxz = oS + 3 * field_size;
+__global ftype *oSyx = oS + 4 * field_size;
+__global ftype *oSzy = oS + 5 * field_size;
+
+oSxy[i] = aEyx * aHzt;
+oSxz[i] = -aEzx * aHyt;
+oSyz[i] = aEzy * aHxt;
+oSyx[i] = -aExy * aHzt;
+oSzx[i] = aExz * aHyt;
+oSzy[i] = -aEyz * aHxt;
+{%- endif -%}
--- a/opencl_fdtd/kernels/update_s.cl
+++ b/opencl_fdtd/kernels/update_s.cl
@ -0,0 +1,36 @@
+/*
+ *  Update E-field, including any PMLs.
+ *  
+ *  Template parameters:
+ *   common_header: Rendered contents of common.cl
+ *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
+ *   pml_thickness: Number of cells (integer)
+ *   
+ *  OpenCL args:
+ *   E, H, dt, S, oS
+ */
+
+{{common_header}}
+
+//////////////////////////////////////////////////////////////////////
+
+
+/*
+ * Calculate S from oS (pre-calculated components)
+ */ 
+__global ftype *Sx = S + XX;
+__global ftype *Sy = S + YY;
+__global ftype *Sz = S + ZZ;
+
+// Use unscaled S components from H locations 
+__global ftype *oSxy = oS + 0 * field_size;
+__global ftype *oSyz = oS + 1 * field_size;
+__global ftype *oSzx = oS + 2 * field_size;
+__global ftype *oSxz = oS + 3 * field_size;
+__global ftype *oSyx = oS + 4 * field_size;
+__global ftype *oSzy = oS + 5 * field_size;
+
+ftype s_factor = dt * 0.125;
+Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
+Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
+Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;
--- a/opencl_fdtd/simulation.py
+++ b/opencl_fdtd/simulation.py
@ -0,0 +1,238 @@
+"""
+Class for constructing and holding the basic FDTD operations and fields
+"""
+
+from typing import List, Dict, Callable
+from collections import OrderedDict
+import numpy
+import jinja2
+import warnings
+
+import pyopencl
+import pyopencl.array
+from pyopencl.elementwise import ElementwiseKernel
+
+from fdfd_tools import vec
+
+
+__author__ = 'Jan Petykiewicz'
+
+
+# Create jinja2 env on module load
+jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
+
+
+class Simulation(object):
+    """
+    Constructs and holds the basic FDTD operations and related fields
+    """
+    E = None    # type: List[pyopencl.array.Array]
+    H = None    # type: List[pyopencl.array.Array]
+    S = None    # type: List[pyopencl.array.Array]
+    eps = None  # type: List[pyopencl.array.Array]
+    dt = None   # type: float
+
+    arg_type = None     # type: numpy.float32 or numpy.float64
+
+    context = None      # type: pyopencl.Context
+    queue = None        # type: pyopencl.CommandQueue
+
+    update_E = None     # type: Callable[[],pyopencl.Event]
+    update_H = None     # type: Callable[[],pyopencl.Event]
+    update_S = None     # type: Callable[[],pyopencl.Event]
+    sources = None      # type: Dict[str, str]
+
+    def __init__(self,
+                 epsilon: List[numpy.ndarray],
+                 dt: float = .99/numpy.sqrt(3),
+                 initial_E: List[numpy.ndarray] = None,
+                 initial_H: List[numpy.ndarray] = None,
+                 context: pyopencl.Context = None,
+                 queue: pyopencl.CommandQueue = None,
+                 float_type: numpy.float32 or numpy.float64 = numpy.float32,
+                 pml_thickness: int = 10,
+                 pmls: List[List[str]] = None,
+                 do_poynting: bool = True):
+        """
+        Initialize the simulation.
+
+        :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
+                spanning the simulation domain. Relative epsilon is used.
+        :param dt: Time step. Default is the Courant factor.
+        :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
+        :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
+        :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
+        :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
+        :param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
+        """
+
+        if len(epsilon) != 3:
+            Exception('Epsilon must be a list with length of 3')
+        if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
+            Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
+
+        if context is None:
+            self.context = pyopencl.create_some_context()
+        else:
+            self.context = context
+
+        if queue is None:
+            self.queue = pyopencl.CommandQueue(self.context)
+        else:
+            self.queue = queue
+
+        if dt > .99/numpy.sqrt(3):
+            warnings.warn('Warning: unstable dt: {}'.format(dt))
+        elif dt <= 0:
+            raise Exception('Invalid dt: {}'.format(dt))
+        else:
+            self.dt = dt
+
+        self.arg_type = float_type
+        self.sources = {}
+        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type))
+
+        if initial_E is None:
+            self.E = pyopencl.array.zeros_like(self.eps)
+        else:
+            if len(initial_E) != 3:
+                Exception('Initial_E must be a list of length 3')
+            if not all((E.shape == epsilon[0].shape for E in initial_E)):
+                Exception('Initial_E list elements must have same shape as epsilon elements')
+            self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
+
+        if initial_H is None:
+            self.H = pyopencl.array.zeros_like(self.eps)
+        else:
+            if len(initial_H) != 3:
+                Exception('Initial_H must be a list of length 3')
+            if not all((H.shape == epsilon[0].shape for H in initial_H)):
+                Exception('Initial_H list elements must have same shape as epsilon elements')
+            self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
+
+        if pmls is None:
+            pmls = [[d, p] for d in 'xyz' for p in 'np']
+
+        ctype = type_to_C(self.arg_type)
+
+        def ptr(arg: str) -> str:
+            return ctype + ' *' + arg
+
+        base_fields = OrderedDict()
+        base_fields[ptr('E')] = self.E
+        base_fields[ptr('H')] = self.H
+        base_fields[ctype + ' dt'] = self.dt
+
+        eps_field = OrderedDict()
+        eps_field[ptr('eps')] = self.eps
+
+        common_source = jinja_env.get_template('common.cl').render(
+                ftype=ctype,
+                shape=epsilon[0].shape,
+                )
+        jinja_args = {
+                'common_header': common_source,
+                'pml_thickness': pml_thickness,
+                'pmls': pmls,
+                'do_poynting': do_poynting,
+                }
+        E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
+        H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
+
+        self.sources['E'] = E_source
+        self.sources['H'] = H_source
+
+        if do_poynting:
+            S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
+            self.sources['S'] = S_source
+
+            self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type)
+            self.S = pyopencl.array.zeros_like(self.E)
+            S_fields = OrderedDict()
+            S_fields[ptr('oS')] = self.oS
+            S_fields[ptr('S')] = self.S
+        else:
+            S_fields = OrderedDict()
+
+        '''
+        PML
+        '''
+        m = (3.5, 1)
+        sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0)
+        alpha_max = 0  # TODO: Decide what to do about non-zero alpha
+
+        def par(x):
+            sigma = ((x / pml_thickness) ** m[0]) * sigma_max
+            alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max
+            p0 = numpy.exp(-(sigma + alpha) * dt)
+            p1 = sigma / (sigma + alpha) * (p0 - 1)
+            return p0, p1
+
+        xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
+        xep -= 0.5
+        xhn -= 0.5
+
+        pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh']
+        pml_e_fields = OrderedDict()
+        pml_h_fields = OrderedDict()
+        for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)):
+            pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe)
+            pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph)
+
+        for pml in pmls:
+            uv = 'xyz'.replace(pml[0], '')
+            psi_base = 'Psi_' + ''.join(pml) + '_'
+            psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
+
+            psi_shape = list(epsilon[0].shape)
+            psi_shape['xyz'.find(pml[0])] = pml_thickness
+
+            for ne, nh in zip(*psi_names):
+                pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+                pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
+
+        self.pml_e_fields = pml_e_fields
+        self.pml_h_fields = pml_h_fields
+
+
+        '''
+        Create operations
+        '''
+        E_args = OrderedDict()
+        [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)]
+        E_update = ElementwiseKernel(self.context, operation=E_source,
+                                     arguments=', '.join(E_args.keys()))
+
+        H_args = OrderedDict()
+        [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)]
+        H_update = ElementwiseKernel(self.context, operation=H_source,
+                                     arguments=', '.join(H_args.keys()))
+        self.update_E = lambda e: E_update(*E_args.values(), wait_for=e)
+        self.update_H = lambda e: H_update(*H_args.values(), wait_for=e)
+
+        if do_poynting:
+            S_args = OrderedDict()
+            [S_args.update(d) for d in (base_fields, S_fields)]
+            S_update = ElementwiseKernel(self.context, operation=S_source,
+                                         arguments=', '.join(S_args.keys()))
+
+            self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
+
+
+def type_to_C(float_type) -> str:
+    """
+    Returns a string corresponding to the C equivalent of a numpy type.
+    Only works for float16, float32, float64.
+
+    :param float_type: e.g. numpy.float32
+    :return: string containing the corresponding C type (eg. 'double')
+    """
+    if float_type == numpy.float16:
+        arg_type = 'half'
+    elif float_type == numpy.float32:
+        arg_type = 'float'
+    elif float_type == numpy.float64:
+        arg_type = 'double'
+    else:
+        raise Exception('Unsupported type')
+    return arg_type
				`@ -0,0 +1 @@`
				`from .simulation import Simulation, type_to_C`