From d34c478f1d7122028f9540737de47cbc08b2d3cc Mon Sep 17 00:00:00 2001 From: jan Date: Thu, 1 Sep 2016 14:39:44 -0700 Subject: [PATCH] Rewrite, with the following features: - Move to jinja2 templates for the opencl code - Combine PML code into the E, H updates for speed - Add Poynting vector calculation code, including precalculation during H update - Use arrays for PML parameters (p0, p1) - Switch to linearized, C-ordered fields (~50% performance boost??) - Added jinja2 and fdfd_tools dependencies --- fdtd.py | 68 ++++++----- fdtd/base.py | 72 ----------- fdtd/boundary.py | 185 ---------------------------- fdtd/kernels/common.cl | 84 +++++++++++++ fdtd/kernels/update_e.cl | 78 ++++++++++++ fdtd/kernels/update_h.cl | 125 +++++++++++++++++++ fdtd/kernels/update_s.cl | 36 ++++++ fdtd/simulation.py | 258 ++++++++++++++++++++++----------------- requirements.txt | 2 + 9 files changed, 506 insertions(+), 402 deletions(-) delete mode 100644 fdtd/base.py delete mode 100644 fdtd/boundary.py create mode 100644 fdtd/kernels/common.cl create mode 100644 fdtd/kernels/update_e.cl create mode 100644 fdtd/kernels/update_h.cl create mode 100644 fdtd/kernels/update_s.cl diff --git a/fdtd.py b/fdtd.py index 0ce560e..ecd16b0 100644 --- a/fdtd.py +++ b/fdtd.py @@ -8,12 +8,17 @@ import sys import time import numpy -import h5py +import lzma +import dill from fdtd.simulation import Simulation from masque import Pattern, shapes import gridlock import pcgen +import fdfd_tools + + +__author__ = 'Jan Petykiewicz' def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: @@ -75,7 +80,7 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: def main(): max_t = 8000 # number of timesteps - dx = 40 # discretization (nm/cell) + dx = 25 # discretization (nm/cell) pml_thickness = 8 # (number of cells) wl = 1550 # Excitation wavelength and fwhm @@ -114,19 +119,9 @@ def main(): eps=n_air**2, polygons=mask.as_polygons()) - print(grid.shape) + print('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### - sim = Simulation(grid.grids) - - # Conducting boundaries and pmls in every direction - c_args = [] - pml_args = [] - for d in (0, 1, 2): - for p in (-1, 1): - c_args += [{'direction': d, 'polarity': p}] - pml_args += [{'direction': d, 'polarity': p, 'epsilon_eff': n_slab**2}] - sim.init_conductors(c_args) - sim.init_cpml(pml_args) + sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) # Source parameters and function w = 2 * numpy.pi * dx / wl @@ -137,31 +132,44 @@ def main(): def field_source(i): t0 = i * sim.dt - delay return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2) + + with open('sources.c', 'w') as f: + f.write(sim.sources['E']) + f.write('\n==========================================\n') + f.write(sim.sources['H']) + if sim.update_S: + f.write('\n==========================================\n') + f.write(sim.sources['S']) # #### Run a bunch of iterations #### # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued immediately and run # once prev_event is finished. - output_file = h5py.File('simulation_output.h5', 'w') start = time.perf_counter() for t in range(max_t): - event = sim.cpml_E([]) - sim.update_E([event]).wait() + sim.update_E([]).wait() - sim.E[1][tuple(grid.shape//2)] += field_source(t) - event = sim.conductor_E([]) - event = sim.cpml_H([event]) - event = sim.update_H([event]) - sim.conductor_H([event]).wait() + ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape) + sim.E[ind] += field_source(t) + e = sim.update_H([]) + if sim.update_S: + e = sim.update_S([e]) + e.wait() - print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) - sys.stdout.flush() + if t % 100 == 0: + print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) + sys.stdout.flush() - # Save field slices - if (t % 20 == 0 and (max_t - t < 1000 or t < 1000)) or t == max_t-1: - print('saving E-field') - for j, f in enumerate(sim.E): - a = f.get() - output_file['/E{}_t{}'.format('xyz'[j], t)] = a[:, :, round(a.shape[2]/2)] + with lzma.open('saved_simulation', 'wb') as f: + def unvec(f): + return fdfd_tools.unvec(f, grid.shape) + d = { + 'grid': grid, + 'E': unvec(sim.E.get()), + 'H': unvec(sim.H.get()), + } + if sim.S is not None: + d['S'] = unvec(sim.S.get()) + dill.dump(d, f) if __name__ == '__main__': main() diff --git a/fdtd/base.py b/fdtd/base.py deleted file mode 100644 index 6285fb9..0000000 --- a/fdtd/base.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -Basic code snippets for opencl FDTD -""" - -from typing import List -import numpy - - -def shape_source(shape: List[int] or numpy.ndarray) -> str: - """ - Defines sx, sy, sz C constants specifying the shape of the grid in each of the 3 dimensions. - - :param shape: [sx, sy, sz] values. - :return: String containing C source. - """ - sxyz = """ -// Field sizes -const int sx = {shape[0]}; -const int sy = {shape[1]}; -const int sz = {shape[2]}; -""".format(shape=shape) - return sxyz - -# Defines dix, diy, diz constants used for stepping in the x, y, z directions in a linear array -# (ie, given Ex[i] referring to position (x, y, z), Ex[i+diy] will refer to position (x, y+1, z)) -dixyz_source = """ -// Convert offset in field xyz to linear index offset -const int dix = sz * sy; -const int diy = sz; -const int diz = 1; -""" - -# Given a linear index i and shape sx, sy, sz, defines x, y, and z as the 3D indices of the current element (i). -xyz_source = """ -// Convert linear index to field index (xyz) -const int x = i / (sz * sy); -const int y = (i - x * sz * sy) / sz; -const int z = (i - y * sz - x * sz * sy); -""" - -# Source code for updating the E field; maxes use of dixyz_source. -maxwell_E_source = """ -// E update equations -Ex[i] += dt / epsx[i] * ((Hz[i] - Hz[i-diy]) - (Hy[i] - Hy[i-diz])); -Ey[i] += dt / epsy[i] * ((Hx[i] - Hx[i-diz]) - (Hz[i] - Hz[i-dix])); -Ez[i] += dt / epsz[i] * ((Hy[i] - Hy[i-dix]) - (Hx[i] - Hx[i-diy])); -""" - -# Source code for updating the H field; maxes use of dixyz_source and assumes mu=0 -maxwell_H_source = """ -// H update equations -Hx[i] -= dt * ((Ez[i+diy] - Ez[i]) - (Ey[i+diz] - Ey[i])); -Hy[i] -= dt * ((Ex[i+diz] - Ex[i]) - (Ez[i+dix] - Ez[i])); -Hz[i] -= dt * ((Ey[i+dix] - Ey[i]) - (Ex[i+diy] - Ex[i])); -""" - - -def type_to_C(float_type: numpy.float32 or numpy.float64) -> str: - """ - Returns a string corresponding to the C equivalent of a numpy type. - Only works for float32 and float64. - - :param float_type: numpy.float32 or numpy.float64 - :return: string containing the corresponding C type (eg. 'double') - """ - if float_type == numpy.float32: - arg_type = 'float' - elif float_type == numpy.float64: - arg_type = 'double' - else: - raise Exception('Unsupported type') - return arg_type diff --git a/fdtd/boundary.py b/fdtd/boundary.py deleted file mode 100644 index bb8dbdf..0000000 --- a/fdtd/boundary.py +++ /dev/null @@ -1,185 +0,0 @@ -from typing import List, Dict -import numpy - - -def conductor(direction: int, - polarity: int, - ) -> List[str]: - """ - Create source code for conducting boundary conditions. - - :param direction: integer in range(3), corresponding to x,y,z. - :param polarity: -1 or 1, specifying eg. a -x or +x boundary. - :return: [E_source, H_source] source code for E and H boundary update steps. - """ - if direction not in range(3): - raise Exception('Invalid direction: {}'.format(direction)) - - if polarity not in (-1, 1): - raise Exception('Invalid polarity: {}'.format(polarity)) - - r = 'xyz'[direction] - uv = 'xyz'.replace(r, '') - - if polarity < 0: - bc_e = """ -if ({r} == 0) {{ - E{r}[i] = 0; - E{u}[i] = E{u}[i+di{r}]; - E{v}[i] = E{v}[i+di{r}]; -}} -""" - bc_h = """ -if ({r} == 0) {{ - H{r}[i] = H{r}[i+di{r}]; - H{u}[i] = 0; - H{v}[i] = 0; -}} -""" - - elif polarity > 0: - bc_e = """ -if ({r} == s{r} - 1) {{ - E{r}[i] = -E{r}[i-2*di{r}]; - E{u}[i] = +E{u}[i-di{r}]; - E{v}[i] = +E{v}[i-di{r}]; -}} else if ({r} == s{r} - 2) {{ - E{r}[i] = 0; -}} -""" - bc_h = """ -if ({r} == s{r} - 1) {{ - H{r}[i] = +H{r}[i-di{r}]; - H{u}[i] = -H{u}[i-2*di{r}]; - H{v}[i] = -H{v}[i-2*di{r}]; -}} else if ({r} == s{r} - 2) {{ - H{u}[i] = 0; - H{v}[i] = 0; -}} -""" - else: - raise Exception() - - replacements = {'r': r, 'u': uv[0], 'v': uv[1]} - return [s.format(**replacements) for s in (bc_e, bc_h)] - - -def cpml(direction: int, - polarity: int, - dt: float, - thickness: int=8, - epsilon_eff: float=1, - ) -> Dict: - """ - Generate source code for complex phase matched layer (cpml) absorbing boundaries. - These are not full boundary conditions and require a conducting boundary to be added - in the same direction as the pml. - - :param direction: integer in range(3), corresponding to x, y, z directions. - :param polarity: -1 or 1, corresponding to eg. -x or +x direction. - :param dt: timestep used by the simulation - :param thickness: Number of cells used by the pml (the grid is NOT expanded to add these cells). Default 8. - :param epsilon_eff: Effective epsilon_r of the pml layer. Default 1. - :return: Dict with entries 'E', 'H' (update equations for E and H) and 'psi_E', 'psi_H' (lists of str, - specifying the field names of the cpml fields used in the E and H update steps. Eg., - Psi_xn_Ex for the complex Ex component for the x- pml.) - """ - if direction not in range(3): - raise Exception('Invalid direction: {}'.format(direction)) - - if polarity not in (-1, 1): - raise Exception('Invalid polarity: {}'.format(polarity)) - - if thickness <= 2: - raise Exception('It would be wise to have a pml with 4+ cells of thickness') - - if epsilon_eff <= 0: - raise Exception('epsilon_eff must be positive') - - m = (3.5, 1) - sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(epsilon_eff) - alpha_max = 0 # TODO: Decide what to do about non-zero alpha - transverse = numpy.delete(range(3), direction) - - r = 'xyz'[direction] - np = 'nVp'[numpy.sign(polarity)+1] - uv = ['xyz'[i] for i in transverse] - - xe = numpy.arange(1, thickness+1, dtype=float)[::-1] - xh = numpy.arange(1, thickness+1, dtype=float)[::-1] - if polarity > 0: - xe -= 0.5 - elif polarity < 0: - xh -= 0.5 - - def par(x): - sigma = ((x / thickness) ** m[0]) * sigma_max - alpha = ((1 - x / thickness) ** m[1]) * alpha_max - p0 = numpy.exp(-(sigma + alpha) * dt) - p1 = sigma / (sigma + alpha) * (p0 - 1) - return p0, p1 - p0e, p1e = par(xe) - p0h, p1h = par(xh) - - vals = {'r': r, - 'u': uv[0], - 'v': uv[1], - 'np': np, - 'th': thickness, - 'p0e': ', '.join((str(x) for x in p0e)), - 'p1e': ', '.join((str(x) for x in p1e)), - 'p0h': ', '.join((str(x) for x in p0h)), - 'p1h': ', '.join((str(x) for x in p1h)), - 'se': '-+'[direction % 2], - 'sh': '+-'[direction % 2]} - - if polarity < 0: - bounds_if = """ -if ( 0 < {r} && {r} < {th} + 1 ) {{ - const int ir = {r} - 1; // index into pml parameters - const int ip = {v} + {u} * s{v} + ir * s{v} * s{u}; // linear index into Psi -""" - elif polarity > 0: - bounds_if = """ -if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{ - const int ir = (s{r} - 1) - ({r} + 1); // index into pml parameters - const int ip = {v} + {u} * s{v} + ir * s{v} * s{u}; // linear index into Psi -""" - else: - raise Exception('Bad polarity (=0)') - - code_e = """ - // pml parameters: - const float p0[{th}] = {{ {p0e} }}; - const float p1[{th}] = {{ {p1e} }}; - - Psi_{r}{np}_E{u}[ip] = p0[ir] * Psi_{r}{np}_E{u}[ip] + p1[ir] * (H{v}[i] - H{v}[i-di{r}]); - Psi_{r}{np}_E{v}[ip] = p0[ir] * Psi_{r}{np}_E{v}[ip] + p1[ir] * (H{u}[i] - H{u}[i-di{r}]); - - E{u}[i] {se}= dt / eps{u}[i] * Psi_{r}{np}_E{u}[ip]; - E{v}[i] {sh}= dt / eps{v}[i] * Psi_{r}{np}_E{v}[ip]; -}} -""" - code_h = """ - // pml parameters: - const float p0[{th}] = {{ {p0h} }}; - const float p1[{th}] = {{ {p1h} }}; - - Psi_{r}{np}_H{u}[ip] = p0[ir] * Psi_{r}{np}_H{u}[ip] + p1[ir] * (E{v}[i+di{r}] - E{v}[i]); - Psi_{r}{np}_H{v}[ip] = p0[ir] * Psi_{r}{np}_H{v}[ip] + p1[ir] * (E{u}[i+di{r}] - E{u}[i]); - - H{u}[i] {sh}= dt * Psi_{r}{np}_H{u}[ip]; - H{v}[i] {se}= dt * Psi_{r}{np}_H{v}[ip]; -}} -""" - - pml_data = { - 'E': (bounds_if + code_e).format(**vals), - 'H': (bounds_if + code_h).format(**vals), - 'psi_E': ['Psi_{r}{np}_E{u}'.format(**vals), - 'Psi_{r}{np}_E{v}'.format(**vals)], - 'psi_H': ['Psi_{r}{np}_H{u}'.format(**vals), - 'Psi_{r}{np}_H{v}'.format(**vals)], - } - - return pml_data diff --git a/fdtd/kernels/common.cl b/fdtd/kernels/common.cl new file mode 100644 index 0000000..d203ca7 --- /dev/null +++ b/fdtd/kernels/common.cl @@ -0,0 +1,84 @@ +{# +/* Common code for E, H updates + * + * Template parameters: + * ftype type name (e.g. float or double) + * shape list of 3 ints specifying shape of fields + */ +#} + +typedef {{ftype}} ftype; + + +/* + * Field size info + */ +const size_t sx = {{shape[0]}}; +const size_t sy = {{shape[1]}}; +const size_t sz = {{shape[2]}}; +const size_t field_size = sx * sy * sz; + +//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if +// i is outside the bounds of Ex[]. +if (i >= field_size) { + PYOPENCL_ELWISE_CONTINUE; +} + + +/* + * Array indexing + */ +// Given a linear index i and shape (sx, sy, sz), defines x, y, and z +// as the 3D indices of the current element (i). +// (ie, converts linear index [i] to field indices (x, y, z) +const size_t x = i / (sz * sy); +const size_t y = (i - x * sz * sy) / sz; +const size_t z = (i - y * sz - x * sz * sy); + +// Calculate linear index offsets corresponding to offsets in 3D +// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z) +const size_t dix = sz * sy; +const size_t diy = sz; +const size_t diz = 1; + + +/* + * Pointer math + */ +//Pointer offsets into the components of a linearized vector-field +// (eg. Hx = H + XX, where H and Hx are pointers) +const size_t XX = 0; +const size_t YY = field_size; +const size_t ZZ = field_size * 2; + +//Define pointers to vector components of each field (eg. Hx = H + XX) +__global ftype *Ex = E + XX; +__global ftype *Ey = E + YY; +__global ftype *Ez = E + ZZ; + +__global ftype *Hx = H + XX; +__global ftype *Hy = H + YY; +__global ftype *Hz = H + ZZ; + + +/* + * Implement periodic boundary conditions + * + * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction. + * In the event that we start at x == 0, we actually want to wrap around and grab the cell + * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix . + * + * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction. + * In the event that we start at x == (sx - 1), we actually want to wrap around and grab + * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . + */ +{% for r in 'xyz' %} +int m{{r}} = -di{{r}}; +int p{{r}} = +di{{r}}; +int wrap_{{r}} = (s{{r}} - 1) * di{{r}}; +if ( {{r}} == 0 ) { + m{{r}} = wrap_{{r}}; +} else if ( {{r}} == s{{r}} - 1 ) { + p{{r}} = -wrap_{{r}}; +} +{% endfor %} diff --git a/fdtd/kernels/update_e.cl b/fdtd/kernels/update_e.cl new file mode 100644 index 0000000..2728b46 --- /dev/null +++ b/fdtd/kernels/update_e.cl @@ -0,0 +1,78 @@ +/* + * Update E-field, including any PMLs. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * + * OpenCL args: + * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] + */ + +{{common_header}} + +//////////////////////////////////////////////////////////////////////////// + +__global ftype *epsx = eps + XX; +__global ftype *epsy = eps + YY; +__global ftype *epsz = eps + ZZ; + +{% if pmls -%} +const int pml_thickness = {{pml_thickness}}; +{%- endif %} + +/* + * Precalclate derivatives + */ +ftype dHxy = Hx[i] - Hx[i + my]; +ftype dHxz = Hx[i] - Hx[i + mz]; + +ftype dHyx = Hy[i] - Hy[i + mx]; +ftype dHyz = Hy[i] - Hy[i + mz]; + +ftype dHzx = Hz[i] - Hz[i + mx]; +ftype dHzy = Hz[i] - Hz[i + my]; + +/* + * PML Update + */ +// PML effects on E +ftype pExi = 0; +ftype pEyi = 0; +ftype pEzi = 0; + +{% for r, p in pmls -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} + {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} + {%- if r != 'y' -%} + {%- set se, sh = '-', '+' -%} + {%- else -%} + {%- set se, sh = '+', '-' -%} + {%- endif -%} + + {%- if p == 'n' %} + +if ( {{r}} < pml_thickness ) { + const size_t ir = {{r}}; // index into pml parameters + + {%- elif p == 'p' %} + +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { + const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters + + {%- endif %} + const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}}; + {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}}; + pE{{u}}i {{se}}= {{psi ~ u}}[ip]; + pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; +} +{%- endfor %} + +/* + * Update E + */ +Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi); +Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi); +Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi); diff --git a/fdtd/kernels/update_h.cl b/fdtd/kernels/update_h.cl new file mode 100644 index 0000000..e093cb9 --- /dev/null +++ b/fdtd/kernels/update_h.cl @@ -0,0 +1,125 @@ +/* + * Update H-field, including any PMLs. + * Also precalculate values for poynting vector if necessary. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * do_poynting: Whether to precalculate poynting vector components (boolean) + * + * OpenCL args: + * E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS] + */ + +{{common_header}} + +//////////////////////////////////////////////////////////////////////////// + +{% if pmls -%} +const int pml_thickness = {{pml_thickness}}; +{%- endif %} + +/* + * Precalculate derivatives + */ +ftype dExy = Ex[i + py] - Ex[i]; +ftype dExz = Ex[i + pz] - Ex[i]; + +ftype dEyx = Ey[i + px] - Ey[i]; +ftype dEyz = Ey[i + pz] - Ey[i]; + +ftype dEzx = Ez[i + px] - Ez[i]; +ftype dEzy = Ez[i + py] - Ez[i]; + +{%- if do_poynting %} + + +/* + * Precalculate averaged E + */ +ftype aExy = Ex[i + py] + Ex[i]; +ftype aExz = Ex[i + pz] + Ex[i]; + +ftype aEyx = Ey[i + px] + Ey[i]; +ftype aEyz = Ey[i + pz] + Ey[i]; + +ftype aEzx = Ez[i + px] + Ez[i]; +ftype aEzy = Ez[i + py] + Ez[i]; +{%- endif %} + + +/* + * PML Update + */ +// PML contributions to H +ftype pHxi = 0; +ftype pHyi = 0; +ftype pHzi = 0; + +{%- for r, p in pmls -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} + {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%} + {%- if r != 'y' -%} + {%- set se, sh = '-', '+' -%} + {%- else -%} + {%- set se, sh = '+', '-' -%} + {%- endif -%} + + {%- if p == 'n' %} + +if ( {{r}} < pml_thickness ) { + const size_t ir = {{r}}; // index into pml parameters + + {%- elif p == 'p' %} + +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { + const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters + + {%- endif %} + const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}}; + {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}}; + pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; + pH{{v}}i {{se}}= {{psi ~ v}}[ip]; +} +{%- endfor %} + +/* + * Update H + */ +{% if do_poynting -%} +// Save old H for averaging +ftype Hx_old = Hx[i]; +ftype Hy_old = Hy[i]; +ftype Hz_old = Hz[i]; +{%- endif %} + +// H update equations +Hx[i] -= dt * (dEzy - dEyz + pHxi); +Hy[i] -= dt * (dExz - dEzx + pHyi); +Hz[i] -= dt * (dEyx - dExy + pHzi); + +{% if do_poynting -%} +// Average H across timesteps +ftype aHxt = Hx[i] + Hx_old; +ftype aHyt = Hy[i] + Hy_old; +ftype aHzt = Hz[i] + Hz_old; + +/* + * Calculate unscaled S components at H locations + */ +__global ftype *oSxy = oS + 0 * field_size; +__global ftype *oSyz = oS + 1 * field_size; +__global ftype *oSzx = oS + 2 * field_size; +__global ftype *oSxz = oS + 3 * field_size; +__global ftype *oSyx = oS + 4 * field_size; +__global ftype *oSzy = oS + 5 * field_size; + +oSxy[i] = aEyx * aHzt; +oSxz[i] = -aEzx * aHyt; +oSyz[i] = aEzy * aHxt; +oSyx[i] = -aExy * aHzt; +oSzx[i] = aExz * aHyt; +oSzy[i] = -aEyz * aHxt; +{%- endif -%} diff --git a/fdtd/kernels/update_s.cl b/fdtd/kernels/update_s.cl new file mode 100644 index 0000000..0310411 --- /dev/null +++ b/fdtd/kernels/update_s.cl @@ -0,0 +1,36 @@ +/* + * Update E-field, including any PMLs. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * + * OpenCL args: + * E, H, dt, S, oS + */ + +{{common_header}} + +////////////////////////////////////////////////////////////////////// + + +/* + * Calculate S from oS (pre-calculated components) + */ +__global ftype *Sx = S + XX; +__global ftype *Sy = S + YY; +__global ftype *Sz = S + ZZ; + +// Use unscaled S components from H locations +__global ftype *oSxy = oS + 0 * field_size; +__global ftype *oSyz = oS + 1 * field_size; +__global ftype *oSzx = oS + 2 * field_size; +__global ftype *oSxz = oS + 3 * field_size; +__global ftype *oSyx = oS + 4 * field_size; +__global ftype *oSzy = oS + 5 * field_size; + +ftype s_factor = dt * 0.125; +Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor; +Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor; +Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor; diff --git a/fdtd/simulation.py b/fdtd/simulation.py index 227dcb0..3dd9f62 100644 --- a/fdtd/simulation.py +++ b/fdtd/simulation.py @@ -3,15 +3,23 @@ Class for constructing and holding the basic FDTD operations and fields """ from typing import List, Dict, Callable +from collections import OrderedDict import numpy +import jinja2 import warnings import pyopencl import pyopencl.array from pyopencl.elementwise import ElementwiseKernel -from . import boundary, base -from .base import type_to_C +from fdfd_tools import vec + + +__author__ = 'Jan Petykiewicz' + + +# Create jinja2 env on module load +jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) class Simulation(object): @@ -20,6 +28,7 @@ class Simulation(object): """ E = None # type: List[pyopencl.array.Array] H = None # type: List[pyopencl.array.Array] + S = None # type: List[pyopencl.array.Array] eps = None # type: List[pyopencl.array.Array] dt = None # type: float @@ -30,24 +39,20 @@ class Simulation(object): update_E = None # type: Callable[[],pyopencl.Event] update_H = None # type: Callable[[],pyopencl.Event] - - conductor_E = None # type: Callable[[],pyopencl.Event] - conductor_H = None # type: Callable[[],pyopencl.Event] - - cpml_E = None # type: Callable[[],pyopencl.Event] - cpml_H = None # type: Callable[[],pyopencl.Event] - - cpml_psi_E = None # type: Dict[str, pyopencl.array.Array] - cpml_psi_H = None # type: Dict[str, pyopencl.array.Array] + update_S = None # type: Callable[[],pyopencl.Event] + sources = None # type: Dict[str, str] def __init__(self, epsilon: List[numpy.ndarray], - dt: float=.99/numpy.sqrt(3), - initial_E: List[numpy.ndarray]=None, - initial_H: List[numpy.ndarray]=None, - context: pyopencl.Context=None, - queue: pyopencl.CommandQueue=None, - float_type: numpy.float32 or numpy.float64=numpy.float32): + dt: float = .99/numpy.sqrt(3), + initial_E: List[numpy.ndarray] = None, + initial_H: List[numpy.ndarray] = None, + context: pyopencl.Context = None, + queue: pyopencl.CommandQueue = None, + float_type: numpy.float32 or numpy.float64 = numpy.float32, + pml_thickness: int = 10, + pmls: List[List[str]] = None, + do_poynting: bool = True): """ Initialize the simulation. @@ -67,7 +72,7 @@ class Simulation(object): Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) if context is None: - self.context = pyopencl.create_some_context(False) + self.context = pyopencl.create_some_context() else: self.context = context @@ -84,126 +89,149 @@ class Simulation(object): self.dt = dt self.arg_type = float_type - - self.eps = [pyopencl.array.to_device(self.queue, e.astype(float_type)) for e in epsilon] + self.sources = {} + self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type)) if initial_E is None: - self.E = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)] + self.E = pyopencl.array.zeros_like(self.eps) else: if len(initial_E) != 3: Exception('Initial_E must be a list of length 3') if not all((E.shape == epsilon[0].shape for E in initial_E)): Exception('Initial_E list elements must have same shape as epsilon elements') - self.E = [pyopencl.array.to_device(self.queue, E.astype(float_type)) for E in initial_E] + self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type)) if initial_H is None: - self.H = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)] + self.H = pyopencl.array.zeros_like(self.eps) else: if len(initial_H) != 3: Exception('Initial_H must be a list of length 3') if not all((H.shape == epsilon[0].shape for H in initial_H)): Exception('Initial_H list elements must have same shape as epsilon elements') - self.H = [pyopencl.array.to_device(self.queue, H.astype(float_type)) for H in initial_H] + self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type)) + + if pmls is None: + pmls = [[d, p] for d in 'xyz' for p in 'np'] ctype = type_to_C(self.arg_type) - E_args = [ctype + ' *E' + c for c in 'xyz'] - H_args = [ctype + ' *H' + c for c in 'xyz'] - eps_args = [ctype + ' *eps' + c for c in 'xyz'] - dt_arg = [ctype + ' dt'] + def ptr(arg: str) -> str: + return ctype + ' *' + arg - sxyz = base.shape_source(epsilon[0].shape) - E_source = sxyz + base.dixyz_source + base.maxwell_E_source - H_source = sxyz + base.dixyz_source + base.maxwell_H_source + base_fields = OrderedDict() + base_fields[ptr('E')] = self.E + base_fields[ptr('H')] = self.H + base_fields[ctype + ' dt'] = self.dt + eps_field = OrderedDict() + eps_field[ptr('eps')] = self.eps + + common_source = jinja_env.get_template('common.cl').render( + ftype=ctype, + shape=epsilon[0].shape, + ) + jinja_args = { + 'common_header': common_source, + 'pml_thickness': pml_thickness, + 'pmls': pmls, + 'do_poynting': do_poynting, + } + E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) + H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) + + self.sources['E'] = E_source + self.sources['H'] = H_source + + if do_poynting: + S_source = jinja_env.get_template('update_s.cl').render(**jinja_args) + self.sources['S'] = S_source + + self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type) + self.S = pyopencl.array.zeros_like(self.E) + S_fields = OrderedDict() + S_fields[ptr('oS')] = self.oS + S_fields[ptr('S')] = self.S + else: + S_fields = OrderedDict() + + ''' + PML + ''' + m = (3.5, 1) + sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0) + alpha_max = 0 # TODO: Decide what to do about non-zero alpha + + def par(x): + sigma = ((x / pml_thickness) ** m[0]) * sigma_max + alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max + p0 = numpy.exp(-(sigma + alpha) * dt) + p1 = sigma / (sigma + alpha) * (p0 - 1) + return p0, p1 + + xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4)) + xep -= 0.5 + xhn -= 0.5 + + pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh'] + pml_e_fields = OrderedDict() + pml_h_fields = OrderedDict() + for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)): + pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe) + pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph) + + for pml in pmls: + uv = 'xyz'.replace(pml[0], '') + psi_base = 'Psi_' + ''.join(pml) + '_' + psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] + + psi_shape = list(epsilon[0].shape) + psi_shape['xyz'.find(pml[0])] = pml_thickness + + for ne, nh in zip(*psi_names): + pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) + pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) + + self.pml_e_fields = pml_e_fields + self.pml_h_fields = pml_h_fields + + + ''' + Create operations + ''' + E_args = OrderedDict() + [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)] E_update = ElementwiseKernel(self.context, operation=E_source, - arguments=', '.join(E_args + H_args + dt_arg + eps_args)) + arguments=', '.join(E_args.keys())) + H_args = OrderedDict() + [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)] H_update = ElementwiseKernel(self.context, operation=H_source, - arguments=', '.join(E_args + H_args + dt_arg)) + arguments=', '.join(H_args.keys())) + self.update_E = lambda e: E_update(*E_args.values(), wait_for=e) + self.update_H = lambda e: H_update(*H_args.values(), wait_for=e) - self.update_E = lambda e: E_update(*self.E, *self.H, self.dt, *self.eps, wait_for=e) - self.update_H = lambda e: H_update(*self.E, *self.H, self.dt, wait_for=e) + if do_poynting: + S_args = OrderedDict() + [S_args.update(d) for d in (base_fields, S_fields)] + S_update = ElementwiseKernel(self.context, operation=S_source, + arguments=', '.join(S_args.keys())) - def init_cpml(self, pml_args: List[Dict]): - """ - Initialize absorbing layers (cpml: complex phase matched layer). PMLs are not actual - boundary conditions, so you should add a conducting boundary (.init_conductors()) for - all directions in which you add PMLs. - Allows use of self.cpml_E(events) and self.cpml_H(events). - All necessary additional fields are created on the opencl device. + self.update_S = lambda e: S_update(*S_args.values(), wait_for=e) - :param pml_args: A list containing dictionaries which are passed to .boundary.cpml(...). - The dt argument is set automatically, but the others must be passed in each entry - of pml_args. - """ - sxyz = base.shape_source(self.eps[0].shape) - # Prepare per-iteration constants for later use - pml_E_source = sxyz + base.dixyz_source + base.xyz_source - pml_H_source = sxyz + base.dixyz_source + base.xyz_source +def type_to_C(float_type) -> str: + """ + Returns a string corresponding to the C equivalent of a numpy type. + Only works for float16, float32, float64. - psi_E = [] - psi_H = [] - psi_E_names = [] - psi_H_names = [] - for arg_set in pml_args: - pml_data = boundary.cpml(dt=self.dt, **arg_set) - - pml_E_source += pml_data['E'] - pml_H_source += pml_data['H'] - - ti = numpy.delete(range(3), arg_set['direction']) - trans = [self.eps[0].shape[i] for i in ti] - psi_shape = (8, trans[0], trans[1]) - - psi_E += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type) - for _ in pml_data['psi_E']] - psi_H += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type) - for _ in pml_data['psi_H']] - - psi_E_names += pml_data['psi_E'] - psi_H_names += pml_data['psi_H'] - - ctype = type_to_C(self.arg_type) - E_args = [ctype + ' *E' + c for c in 'xyz'] - H_args = [ctype + ' *H' + c for c in 'xyz'] - eps_args = [ctype + ' *eps' + c for c in 'xyz'] - dt_arg = [ctype + ' dt'] - arglist_E = [ctype + ' *' + psi for psi in psi_E_names] - arglist_H = [ctype + ' *' + psi for psi in psi_H_names] - pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E) - pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H) - - pml_E = ElementwiseKernel(self.context, arguments=pml_E_args, operation=pml_E_source) - pml_H = ElementwiseKernel(self.context, arguments=pml_H_args, operation=pml_H_source) - - self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, wait_for=e) - self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, wait_for=e) - self.cmpl_psi_E = {k: v for k, v in zip(psi_E_names, psi_E)} - self.cmpl_psi_H = {k: v for k, v in zip(psi_H_names, psi_H)} - - def init_conductors(self, conductor_args: List[Dict]): - """ - Initialize reflecting boundary conditions. - Allows use of self.conductor_E(events) and self.conductor_H(events). - - :param conductor_args: List of dictionaries with which to call .boundary.conductor(...). - """ - - sxyz = base.shape_source(self.eps[0].shape) - - # Prepare per-iteration constants - bc_E_source = sxyz + base.dixyz_source + base.xyz_source - bc_H_source = sxyz + base.dixyz_source + base.xyz_source - for arg_set in conductor_args: - [e, h] = boundary.conductor(**arg_set) - bc_E_source += e - bc_H_source += h - - E_args = [type_to_C(self.arg_type) + ' *E' + c for c in 'xyz'] - H_args = [type_to_C(self.arg_type) + ' *H' + c for c in 'xyz'] - bc_E = ElementwiseKernel(self.context, arguments=E_args, operation=bc_E_source) - bc_H = ElementwiseKernel(self.context, arguments=H_args, operation=bc_H_source) - - self.conductor_E = lambda e: bc_E(*self.E, wait_for=e) - self.conductor_H = lambda e: bc_H(*self.H, wait_for=e) + :param float_type: e.g. numpy.float32 + :return: string containing the corresponding C type (eg. 'double') + """ + if float_type == numpy.float16: + arg_type = 'half' + elif float_type == numpy.float32: + arg_type = 'float' + elif float_type == numpy.float64: + arg_type = 'double' + else: + raise Exception('Unsupported type') + return arg_type diff --git a/requirements.txt b/requirements.txt index 6121670..6ed651b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ numpy h5py pyopencl +jinja2 -e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster -e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock -e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque +-e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools