From 727105f8fe114f62233820203b5c6eadbf1638ef Mon Sep 17 00:00:00 2001 From: jan Date: Thu, 1 Sep 2016 14:39:44 -0700 Subject: [PATCH 01/33] use arrays for pml params --- fdtd.py | 6 +++++- fdtd/boundary.py | 24 ++++++------------------ fdtd/simulation.py | 13 +++++++++---- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/fdtd.py b/fdtd.py index 0ce560e..63d2337 100644 --- a/fdtd.py +++ b/fdtd.py @@ -156,8 +156,12 @@ def main(): print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) sys.stdout.flush() + if t % 20 == 0: + r = sum([(f * f * e).get().sum() for f, e in zip(sim.E, sim.eps)]) + print('E sum', r) + # Save field slices - if (t % 20 == 0 and (max_t - t < 1000 or t < 1000)) or t == max_t-1: + if (t % 20 == 0 and (max_t - t <= 1000 or t <= 2000)) or t == max_t-1: print('saving E-field') for j, f in enumerate(sim.E): a = f.get() diff --git a/fdtd/boundary.py b/fdtd/boundary.py index bb8dbdf..6b50f9f 100644 --- a/fdtd/boundary.py +++ b/fdtd/boundary.py @@ -118,18 +118,12 @@ def cpml(direction: int, p0 = numpy.exp(-(sigma + alpha) * dt) p1 = sigma / (sigma + alpha) * (p0 - 1) return p0, p1 - p0e, p1e = par(xe) - p0h, p1h = par(xh) vals = {'r': r, 'u': uv[0], 'v': uv[1], 'np': np, 'th': thickness, - 'p0e': ', '.join((str(x) for x in p0e)), - 'p1e': ', '.join((str(x) for x in p1e)), - 'p0h': ', '.join((str(x) for x in p0h)), - 'p1h': ', '.join((str(x) for x in p1h)), 'se': '-+'[direction % 2], 'sh': '+-'[direction % 2]} @@ -149,24 +143,16 @@ if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{ raise Exception('Bad polarity (=0)') code_e = """ - // pml parameters: - const float p0[{th}] = {{ {p0e} }}; - const float p1[{th}] = {{ {p1e} }}; - - Psi_{r}{np}_E{u}[ip] = p0[ir] * Psi_{r}{np}_E{u}[ip] + p1[ir] * (H{v}[i] - H{v}[i-di{r}]); - Psi_{r}{np}_E{v}[ip] = p0[ir] * Psi_{r}{np}_E{v}[ip] + p1[ir] * (H{u}[i] - H{u}[i-di{r}]); + Psi_{r}{np}_E{u}[ip] = p0e[ir] * Psi_{r}{np}_E{u}[ip] + p1e[ir] * (H{v}[i] - H{v}[i-di{r}]); + Psi_{r}{np}_E{v}[ip] = p0e[ir] * Psi_{r}{np}_E{v}[ip] + p1e[ir] * (H{u}[i] - H{u}[i-di{r}]); E{u}[i] {se}= dt / eps{u}[i] * Psi_{r}{np}_E{u}[ip]; E{v}[i] {sh}= dt / eps{v}[i] * Psi_{r}{np}_E{v}[ip]; }} """ code_h = """ - // pml parameters: - const float p0[{th}] = {{ {p0h} }}; - const float p1[{th}] = {{ {p1h} }}; - - Psi_{r}{np}_H{u}[ip] = p0[ir] * Psi_{r}{np}_H{u}[ip] + p1[ir] * (E{v}[i+di{r}] - E{v}[i]); - Psi_{r}{np}_H{v}[ip] = p0[ir] * Psi_{r}{np}_H{v}[ip] + p1[ir] * (E{u}[i+di{r}] - E{u}[i]); + Psi_{r}{np}_H{u}[ip] = p0h[ir] * Psi_{r}{np}_H{u}[ip] + p1h[ir] * (E{v}[i+di{r}] - E{v}[i]); + Psi_{r}{np}_H{v}[ip] = p0h[ir] * Psi_{r}{np}_H{v}[ip] + p1h[ir] * (E{u}[i+di{r}] - E{u}[i]); H{u}[i] {sh}= dt * Psi_{r}{np}_H{u}[ip]; H{v}[i] {se}= dt * Psi_{r}{np}_H{v}[ip]; @@ -180,6 +166,8 @@ if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{ 'Psi_{r}{np}_E{v}'.format(**vals)], 'psi_H': ['Psi_{r}{np}_H{u}'.format(**vals), 'Psi_{r}{np}_H{v}'.format(**vals)], + 'pe': par(xe), + 'ph': par(xh), } return pml_data diff --git a/fdtd/simulation.py b/fdtd/simulation.py index 227dcb0..6c63273 100644 --- a/fdtd/simulation.py +++ b/fdtd/simulation.py @@ -171,14 +171,19 @@ class Simulation(object): dt_arg = [ctype + ' dt'] arglist_E = [ctype + ' *' + psi for psi in psi_E_names] arglist_H = [ctype + ' *' + psi for psi in psi_H_names] - pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E) - pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H) + pe_args = [ctype + ' *' + s for s in ('p0e', 'p1e')] + ph_args = [ctype + ' *' + s for s in ('p0h', 'p1h')] + pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E + pe_args) + pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H + ph_args) pml_E = ElementwiseKernel(self.context, arguments=pml_E_args, operation=pml_E_source) pml_H = ElementwiseKernel(self.context, arguments=pml_H_args, operation=pml_H_source) - self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, wait_for=e) - self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, wait_for=e) + pe = [pyopencl.array.to_device(self.queue, p) for p in pml_data['pe']] + ph = [pyopencl.array.to_device(self.queue, p) for p in pml_data['ph']] + + self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, *pe, wait_for=e) + self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, *ph, wait_for=e) self.cmpl_psi_E = {k: v for k, v in zip(psi_E_names, psi_E)} self.cmpl_psi_H = {k: v for k, v in zip(psi_H_names, psi_H)} From a6e601b6484406983b4b6b3e36250b9eb3fbe334 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 16 Jul 2016 17:44:51 -0700 Subject: [PATCH 02/33] Clean up comments --- fdtd.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fdtd.py b/fdtd.py index 0ce560e..e586ee3 100644 --- a/fdtd.py +++ b/fdtd.py @@ -96,7 +96,8 @@ def main(): z_max = 1.6 * a xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx - # Coordinates of the edges of the cells. The fdtd package can only do square grids at the moment. + # Coordinates of the edges of the cells. + # The fdtd package can only do square grids at the moment. half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max] edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords] @@ -139,8 +140,8 @@ def main(): return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2) # #### Run a bunch of iterations #### - # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued immediately and run - # once prev_event is finished. + # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued + # immediately and run once prev_event is finished. output_file = h5py.File('simulation_output.h5', 'w') start = time.perf_counter() for t in range(max_t): From 6c3313d7c985d87ad45f9eed145af4f91646ed19 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 16 Jul 2016 18:07:54 -0700 Subject: [PATCH 03/33] call overhead still way too big --- fdtd.py | 9 ++++++--- fdtd/base.py | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fdtd.py b/fdtd.py index e586ee3..8e30fd0 100644 --- a/fdtd.py +++ b/fdtd.py @@ -75,7 +75,7 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: def main(): max_t = 8000 # number of timesteps - dx = 40 # discretization (nm/cell) + dx = 15 # discretization (nm/cell) pml_thickness = 8 # (number of cells) wl = 1550 # Excitation wavelength and fwhm @@ -109,13 +109,16 @@ def main(): eps=n_slab**2) mask = perturbed_l3(a, r) - grid.draw_polygons(surface_normal=gridlock.Direction.z, + for i in range(5000): + print(i) + grid.draw_polygons(surface_normal=gridlock.Direction.z, center=[0, 0, 0], thickness=2 * th, eps=n_air**2, polygons=mask.as_polygons()) + return - print(grid.shape) + print(grid.shape, numpy.prod(grid.shape)) # #### Create the simulation grid #### sim = Simulation(grid.grids) diff --git a/fdtd/base.py b/fdtd/base.py index 6285fb9..3717a65 100644 --- a/fdtd/base.py +++ b/fdtd/base.py @@ -30,7 +30,8 @@ const int diy = sz; const int diz = 1; """ -# Given a linear index i and shape sx, sy, sz, defines x, y, and z as the 3D indices of the current element (i). +# Given a linear index i and shape sx, sy, sz, defines x, y, and z +# as the 3D indices of the current element (i). xyz_source = """ // Convert linear index to field index (xyz) const int x = i / (sz * sy); From d34c478f1d7122028f9540737de47cbc08b2d3cc Mon Sep 17 00:00:00 2001 From: jan Date: Thu, 1 Sep 2016 14:39:44 -0700 Subject: [PATCH 04/33] Rewrite, with the following features: - Move to jinja2 templates for the opencl code - Combine PML code into the E, H updates for speed - Add Poynting vector calculation code, including precalculation during H update - Use arrays for PML parameters (p0, p1) - Switch to linearized, C-ordered fields (~50% performance boost??) - Added jinja2 and fdfd_tools dependencies --- fdtd.py | 68 ++++++----- fdtd/base.py | 72 ----------- fdtd/boundary.py | 185 ---------------------------- fdtd/kernels/common.cl | 84 +++++++++++++ fdtd/kernels/update_e.cl | 78 ++++++++++++ fdtd/kernels/update_h.cl | 125 +++++++++++++++++++ fdtd/kernels/update_s.cl | 36 ++++++ fdtd/simulation.py | 258 ++++++++++++++++++++++----------------- requirements.txt | 2 + 9 files changed, 506 insertions(+), 402 deletions(-) delete mode 100644 fdtd/base.py delete mode 100644 fdtd/boundary.py create mode 100644 fdtd/kernels/common.cl create mode 100644 fdtd/kernels/update_e.cl create mode 100644 fdtd/kernels/update_h.cl create mode 100644 fdtd/kernels/update_s.cl diff --git a/fdtd.py b/fdtd.py index 0ce560e..ecd16b0 100644 --- a/fdtd.py +++ b/fdtd.py @@ -8,12 +8,17 @@ import sys import time import numpy -import h5py +import lzma +import dill from fdtd.simulation import Simulation from masque import Pattern, shapes import gridlock import pcgen +import fdfd_tools + + +__author__ = 'Jan Petykiewicz' def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: @@ -75,7 +80,7 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: def main(): max_t = 8000 # number of timesteps - dx = 40 # discretization (nm/cell) + dx = 25 # discretization (nm/cell) pml_thickness = 8 # (number of cells) wl = 1550 # Excitation wavelength and fwhm @@ -114,19 +119,9 @@ def main(): eps=n_air**2, polygons=mask.as_polygons()) - print(grid.shape) + print('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### - sim = Simulation(grid.grids) - - # Conducting boundaries and pmls in every direction - c_args = [] - pml_args = [] - for d in (0, 1, 2): - for p in (-1, 1): - c_args += [{'direction': d, 'polarity': p}] - pml_args += [{'direction': d, 'polarity': p, 'epsilon_eff': n_slab**2}] - sim.init_conductors(c_args) - sim.init_cpml(pml_args) + sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) # Source parameters and function w = 2 * numpy.pi * dx / wl @@ -137,31 +132,44 @@ def main(): def field_source(i): t0 = i * sim.dt - delay return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2) + + with open('sources.c', 'w') as f: + f.write(sim.sources['E']) + f.write('\n==========================================\n') + f.write(sim.sources['H']) + if sim.update_S: + f.write('\n==========================================\n') + f.write(sim.sources['S']) # #### Run a bunch of iterations #### # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued immediately and run # once prev_event is finished. - output_file = h5py.File('simulation_output.h5', 'w') start = time.perf_counter() for t in range(max_t): - event = sim.cpml_E([]) - sim.update_E([event]).wait() + sim.update_E([]).wait() - sim.E[1][tuple(grid.shape//2)] += field_source(t) - event = sim.conductor_E([]) - event = sim.cpml_H([event]) - event = sim.update_H([event]) - sim.conductor_H([event]).wait() + ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape) + sim.E[ind] += field_source(t) + e = sim.update_H([]) + if sim.update_S: + e = sim.update_S([e]) + e.wait() - print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) - sys.stdout.flush() + if t % 100 == 0: + print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) + sys.stdout.flush() - # Save field slices - if (t % 20 == 0 and (max_t - t < 1000 or t < 1000)) or t == max_t-1: - print('saving E-field') - for j, f in enumerate(sim.E): - a = f.get() - output_file['/E{}_t{}'.format('xyz'[j], t)] = a[:, :, round(a.shape[2]/2)] + with lzma.open('saved_simulation', 'wb') as f: + def unvec(f): + return fdfd_tools.unvec(f, grid.shape) + d = { + 'grid': grid, + 'E': unvec(sim.E.get()), + 'H': unvec(sim.H.get()), + } + if sim.S is not None: + d['S'] = unvec(sim.S.get()) + dill.dump(d, f) if __name__ == '__main__': main() diff --git a/fdtd/base.py b/fdtd/base.py deleted file mode 100644 index 6285fb9..0000000 --- a/fdtd/base.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -Basic code snippets for opencl FDTD -""" - -from typing import List -import numpy - - -def shape_source(shape: List[int] or numpy.ndarray) -> str: - """ - Defines sx, sy, sz C constants specifying the shape of the grid in each of the 3 dimensions. - - :param shape: [sx, sy, sz] values. - :return: String containing C source. - """ - sxyz = """ -// Field sizes -const int sx = {shape[0]}; -const int sy = {shape[1]}; -const int sz = {shape[2]}; -""".format(shape=shape) - return sxyz - -# Defines dix, diy, diz constants used for stepping in the x, y, z directions in a linear array -# (ie, given Ex[i] referring to position (x, y, z), Ex[i+diy] will refer to position (x, y+1, z)) -dixyz_source = """ -// Convert offset in field xyz to linear index offset -const int dix = sz * sy; -const int diy = sz; -const int diz = 1; -""" - -# Given a linear index i and shape sx, sy, sz, defines x, y, and z as the 3D indices of the current element (i). -xyz_source = """ -// Convert linear index to field index (xyz) -const int x = i / (sz * sy); -const int y = (i - x * sz * sy) / sz; -const int z = (i - y * sz - x * sz * sy); -""" - -# Source code for updating the E field; maxes use of dixyz_source. -maxwell_E_source = """ -// E update equations -Ex[i] += dt / epsx[i] * ((Hz[i] - Hz[i-diy]) - (Hy[i] - Hy[i-diz])); -Ey[i] += dt / epsy[i] * ((Hx[i] - Hx[i-diz]) - (Hz[i] - Hz[i-dix])); -Ez[i] += dt / epsz[i] * ((Hy[i] - Hy[i-dix]) - (Hx[i] - Hx[i-diy])); -""" - -# Source code for updating the H field; maxes use of dixyz_source and assumes mu=0 -maxwell_H_source = """ -// H update equations -Hx[i] -= dt * ((Ez[i+diy] - Ez[i]) - (Ey[i+diz] - Ey[i])); -Hy[i] -= dt * ((Ex[i+diz] - Ex[i]) - (Ez[i+dix] - Ez[i])); -Hz[i] -= dt * ((Ey[i+dix] - Ey[i]) - (Ex[i+diy] - Ex[i])); -""" - - -def type_to_C(float_type: numpy.float32 or numpy.float64) -> str: - """ - Returns a string corresponding to the C equivalent of a numpy type. - Only works for float32 and float64. - - :param float_type: numpy.float32 or numpy.float64 - :return: string containing the corresponding C type (eg. 'double') - """ - if float_type == numpy.float32: - arg_type = 'float' - elif float_type == numpy.float64: - arg_type = 'double' - else: - raise Exception('Unsupported type') - return arg_type diff --git a/fdtd/boundary.py b/fdtd/boundary.py deleted file mode 100644 index bb8dbdf..0000000 --- a/fdtd/boundary.py +++ /dev/null @@ -1,185 +0,0 @@ -from typing import List, Dict -import numpy - - -def conductor(direction: int, - polarity: int, - ) -> List[str]: - """ - Create source code for conducting boundary conditions. - - :param direction: integer in range(3), corresponding to x,y,z. - :param polarity: -1 or 1, specifying eg. a -x or +x boundary. - :return: [E_source, H_source] source code for E and H boundary update steps. - """ - if direction not in range(3): - raise Exception('Invalid direction: {}'.format(direction)) - - if polarity not in (-1, 1): - raise Exception('Invalid polarity: {}'.format(polarity)) - - r = 'xyz'[direction] - uv = 'xyz'.replace(r, '') - - if polarity < 0: - bc_e = """ -if ({r} == 0) {{ - E{r}[i] = 0; - E{u}[i] = E{u}[i+di{r}]; - E{v}[i] = E{v}[i+di{r}]; -}} -""" - bc_h = """ -if ({r} == 0) {{ - H{r}[i] = H{r}[i+di{r}]; - H{u}[i] = 0; - H{v}[i] = 0; -}} -""" - - elif polarity > 0: - bc_e = """ -if ({r} == s{r} - 1) {{ - E{r}[i] = -E{r}[i-2*di{r}]; - E{u}[i] = +E{u}[i-di{r}]; - E{v}[i] = +E{v}[i-di{r}]; -}} else if ({r} == s{r} - 2) {{ - E{r}[i] = 0; -}} -""" - bc_h = """ -if ({r} == s{r} - 1) {{ - H{r}[i] = +H{r}[i-di{r}]; - H{u}[i] = -H{u}[i-2*di{r}]; - H{v}[i] = -H{v}[i-2*di{r}]; -}} else if ({r} == s{r} - 2) {{ - H{u}[i] = 0; - H{v}[i] = 0; -}} -""" - else: - raise Exception() - - replacements = {'r': r, 'u': uv[0], 'v': uv[1]} - return [s.format(**replacements) for s in (bc_e, bc_h)] - - -def cpml(direction: int, - polarity: int, - dt: float, - thickness: int=8, - epsilon_eff: float=1, - ) -> Dict: - """ - Generate source code for complex phase matched layer (cpml) absorbing boundaries. - These are not full boundary conditions and require a conducting boundary to be added - in the same direction as the pml. - - :param direction: integer in range(3), corresponding to x, y, z directions. - :param polarity: -1 or 1, corresponding to eg. -x or +x direction. - :param dt: timestep used by the simulation - :param thickness: Number of cells used by the pml (the grid is NOT expanded to add these cells). Default 8. - :param epsilon_eff: Effective epsilon_r of the pml layer. Default 1. - :return: Dict with entries 'E', 'H' (update equations for E and H) and 'psi_E', 'psi_H' (lists of str, - specifying the field names of the cpml fields used in the E and H update steps. Eg., - Psi_xn_Ex for the complex Ex component for the x- pml.) - """ - if direction not in range(3): - raise Exception('Invalid direction: {}'.format(direction)) - - if polarity not in (-1, 1): - raise Exception('Invalid polarity: {}'.format(polarity)) - - if thickness <= 2: - raise Exception('It would be wise to have a pml with 4+ cells of thickness') - - if epsilon_eff <= 0: - raise Exception('epsilon_eff must be positive') - - m = (3.5, 1) - sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(epsilon_eff) - alpha_max = 0 # TODO: Decide what to do about non-zero alpha - transverse = numpy.delete(range(3), direction) - - r = 'xyz'[direction] - np = 'nVp'[numpy.sign(polarity)+1] - uv = ['xyz'[i] for i in transverse] - - xe = numpy.arange(1, thickness+1, dtype=float)[::-1] - xh = numpy.arange(1, thickness+1, dtype=float)[::-1] - if polarity > 0: - xe -= 0.5 - elif polarity < 0: - xh -= 0.5 - - def par(x): - sigma = ((x / thickness) ** m[0]) * sigma_max - alpha = ((1 - x / thickness) ** m[1]) * alpha_max - p0 = numpy.exp(-(sigma + alpha) * dt) - p1 = sigma / (sigma + alpha) * (p0 - 1) - return p0, p1 - p0e, p1e = par(xe) - p0h, p1h = par(xh) - - vals = {'r': r, - 'u': uv[0], - 'v': uv[1], - 'np': np, - 'th': thickness, - 'p0e': ', '.join((str(x) for x in p0e)), - 'p1e': ', '.join((str(x) for x in p1e)), - 'p0h': ', '.join((str(x) for x in p0h)), - 'p1h': ', '.join((str(x) for x in p1h)), - 'se': '-+'[direction % 2], - 'sh': '+-'[direction % 2]} - - if polarity < 0: - bounds_if = """ -if ( 0 < {r} && {r} < {th} + 1 ) {{ - const int ir = {r} - 1; // index into pml parameters - const int ip = {v} + {u} * s{v} + ir * s{v} * s{u}; // linear index into Psi -""" - elif polarity > 0: - bounds_if = """ -if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{ - const int ir = (s{r} - 1) - ({r} + 1); // index into pml parameters - const int ip = {v} + {u} * s{v} + ir * s{v} * s{u}; // linear index into Psi -""" - else: - raise Exception('Bad polarity (=0)') - - code_e = """ - // pml parameters: - const float p0[{th}] = {{ {p0e} }}; - const float p1[{th}] = {{ {p1e} }}; - - Psi_{r}{np}_E{u}[ip] = p0[ir] * Psi_{r}{np}_E{u}[ip] + p1[ir] * (H{v}[i] - H{v}[i-di{r}]); - Psi_{r}{np}_E{v}[ip] = p0[ir] * Psi_{r}{np}_E{v}[ip] + p1[ir] * (H{u}[i] - H{u}[i-di{r}]); - - E{u}[i] {se}= dt / eps{u}[i] * Psi_{r}{np}_E{u}[ip]; - E{v}[i] {sh}= dt / eps{v}[i] * Psi_{r}{np}_E{v}[ip]; -}} -""" - code_h = """ - // pml parameters: - const float p0[{th}] = {{ {p0h} }}; - const float p1[{th}] = {{ {p1h} }}; - - Psi_{r}{np}_H{u}[ip] = p0[ir] * Psi_{r}{np}_H{u}[ip] + p1[ir] * (E{v}[i+di{r}] - E{v}[i]); - Psi_{r}{np}_H{v}[ip] = p0[ir] * Psi_{r}{np}_H{v}[ip] + p1[ir] * (E{u}[i+di{r}] - E{u}[i]); - - H{u}[i] {sh}= dt * Psi_{r}{np}_H{u}[ip]; - H{v}[i] {se}= dt * Psi_{r}{np}_H{v}[ip]; -}} -""" - - pml_data = { - 'E': (bounds_if + code_e).format(**vals), - 'H': (bounds_if + code_h).format(**vals), - 'psi_E': ['Psi_{r}{np}_E{u}'.format(**vals), - 'Psi_{r}{np}_E{v}'.format(**vals)], - 'psi_H': ['Psi_{r}{np}_H{u}'.format(**vals), - 'Psi_{r}{np}_H{v}'.format(**vals)], - } - - return pml_data diff --git a/fdtd/kernels/common.cl b/fdtd/kernels/common.cl new file mode 100644 index 0000000..d203ca7 --- /dev/null +++ b/fdtd/kernels/common.cl @@ -0,0 +1,84 @@ +{# +/* Common code for E, H updates + * + * Template parameters: + * ftype type name (e.g. float or double) + * shape list of 3 ints specifying shape of fields + */ +#} + +typedef {{ftype}} ftype; + + +/* + * Field size info + */ +const size_t sx = {{shape[0]}}; +const size_t sy = {{shape[1]}}; +const size_t sz = {{shape[2]}}; +const size_t field_size = sx * sy * sz; + +//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if +// i is outside the bounds of Ex[]. +if (i >= field_size) { + PYOPENCL_ELWISE_CONTINUE; +} + + +/* + * Array indexing + */ +// Given a linear index i and shape (sx, sy, sz), defines x, y, and z +// as the 3D indices of the current element (i). +// (ie, converts linear index [i] to field indices (x, y, z) +const size_t x = i / (sz * sy); +const size_t y = (i - x * sz * sy) / sz; +const size_t z = (i - y * sz - x * sz * sy); + +// Calculate linear index offsets corresponding to offsets in 3D +// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z) +const size_t dix = sz * sy; +const size_t diy = sz; +const size_t diz = 1; + + +/* + * Pointer math + */ +//Pointer offsets into the components of a linearized vector-field +// (eg. Hx = H + XX, where H and Hx are pointers) +const size_t XX = 0; +const size_t YY = field_size; +const size_t ZZ = field_size * 2; + +//Define pointers to vector components of each field (eg. Hx = H + XX) +__global ftype *Ex = E + XX; +__global ftype *Ey = E + YY; +__global ftype *Ez = E + ZZ; + +__global ftype *Hx = H + XX; +__global ftype *Hy = H + YY; +__global ftype *Hz = H + ZZ; + + +/* + * Implement periodic boundary conditions + * + * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction. + * In the event that we start at x == 0, we actually want to wrap around and grab the cell + * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix . + * + * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction. + * In the event that we start at x == (sx - 1), we actually want to wrap around and grab + * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . + */ +{% for r in 'xyz' %} +int m{{r}} = -di{{r}}; +int p{{r}} = +di{{r}}; +int wrap_{{r}} = (s{{r}} - 1) * di{{r}}; +if ( {{r}} == 0 ) { + m{{r}} = wrap_{{r}}; +} else if ( {{r}} == s{{r}} - 1 ) { + p{{r}} = -wrap_{{r}}; +} +{% endfor %} diff --git a/fdtd/kernels/update_e.cl b/fdtd/kernels/update_e.cl new file mode 100644 index 0000000..2728b46 --- /dev/null +++ b/fdtd/kernels/update_e.cl @@ -0,0 +1,78 @@ +/* + * Update E-field, including any PMLs. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * + * OpenCL args: + * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] + */ + +{{common_header}} + +//////////////////////////////////////////////////////////////////////////// + +__global ftype *epsx = eps + XX; +__global ftype *epsy = eps + YY; +__global ftype *epsz = eps + ZZ; + +{% if pmls -%} +const int pml_thickness = {{pml_thickness}}; +{%- endif %} + +/* + * Precalclate derivatives + */ +ftype dHxy = Hx[i] - Hx[i + my]; +ftype dHxz = Hx[i] - Hx[i + mz]; + +ftype dHyx = Hy[i] - Hy[i + mx]; +ftype dHyz = Hy[i] - Hy[i + mz]; + +ftype dHzx = Hz[i] - Hz[i + mx]; +ftype dHzy = Hz[i] - Hz[i + my]; + +/* + * PML Update + */ +// PML effects on E +ftype pExi = 0; +ftype pEyi = 0; +ftype pEzi = 0; + +{% for r, p in pmls -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} + {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} + {%- if r != 'y' -%} + {%- set se, sh = '-', '+' -%} + {%- else -%} + {%- set se, sh = '+', '-' -%} + {%- endif -%} + + {%- if p == 'n' %} + +if ( {{r}} < pml_thickness ) { + const size_t ir = {{r}}; // index into pml parameters + + {%- elif p == 'p' %} + +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { + const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters + + {%- endif %} + const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}}; + {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}}; + pE{{u}}i {{se}}= {{psi ~ u}}[ip]; + pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; +} +{%- endfor %} + +/* + * Update E + */ +Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi); +Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi); +Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi); diff --git a/fdtd/kernels/update_h.cl b/fdtd/kernels/update_h.cl new file mode 100644 index 0000000..e093cb9 --- /dev/null +++ b/fdtd/kernels/update_h.cl @@ -0,0 +1,125 @@ +/* + * Update H-field, including any PMLs. + * Also precalculate values for poynting vector if necessary. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * do_poynting: Whether to precalculate poynting vector components (boolean) + * + * OpenCL args: + * E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS] + */ + +{{common_header}} + +//////////////////////////////////////////////////////////////////////////// + +{% if pmls -%} +const int pml_thickness = {{pml_thickness}}; +{%- endif %} + +/* + * Precalculate derivatives + */ +ftype dExy = Ex[i + py] - Ex[i]; +ftype dExz = Ex[i + pz] - Ex[i]; + +ftype dEyx = Ey[i + px] - Ey[i]; +ftype dEyz = Ey[i + pz] - Ey[i]; + +ftype dEzx = Ez[i + px] - Ez[i]; +ftype dEzy = Ez[i + py] - Ez[i]; + +{%- if do_poynting %} + + +/* + * Precalculate averaged E + */ +ftype aExy = Ex[i + py] + Ex[i]; +ftype aExz = Ex[i + pz] + Ex[i]; + +ftype aEyx = Ey[i + px] + Ey[i]; +ftype aEyz = Ey[i + pz] + Ey[i]; + +ftype aEzx = Ez[i + px] + Ez[i]; +ftype aEzy = Ez[i + py] + Ez[i]; +{%- endif %} + + +/* + * PML Update + */ +// PML contributions to H +ftype pHxi = 0; +ftype pHyi = 0; +ftype pHzi = 0; + +{%- for r, p in pmls -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} + {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%} + {%- if r != 'y' -%} + {%- set se, sh = '-', '+' -%} + {%- else -%} + {%- set se, sh = '+', '-' -%} + {%- endif -%} + + {%- if p == 'n' %} + +if ( {{r}} < pml_thickness ) { + const size_t ir = {{r}}; // index into pml parameters + + {%- elif p == 'p' %} + +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { + const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters + + {%- endif %} + const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}}; + {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}}; + pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; + pH{{v}}i {{se}}= {{psi ~ v}}[ip]; +} +{%- endfor %} + +/* + * Update H + */ +{% if do_poynting -%} +// Save old H for averaging +ftype Hx_old = Hx[i]; +ftype Hy_old = Hy[i]; +ftype Hz_old = Hz[i]; +{%- endif %} + +// H update equations +Hx[i] -= dt * (dEzy - dEyz + pHxi); +Hy[i] -= dt * (dExz - dEzx + pHyi); +Hz[i] -= dt * (dEyx - dExy + pHzi); + +{% if do_poynting -%} +// Average H across timesteps +ftype aHxt = Hx[i] + Hx_old; +ftype aHyt = Hy[i] + Hy_old; +ftype aHzt = Hz[i] + Hz_old; + +/* + * Calculate unscaled S components at H locations + */ +__global ftype *oSxy = oS + 0 * field_size; +__global ftype *oSyz = oS + 1 * field_size; +__global ftype *oSzx = oS + 2 * field_size; +__global ftype *oSxz = oS + 3 * field_size; +__global ftype *oSyx = oS + 4 * field_size; +__global ftype *oSzy = oS + 5 * field_size; + +oSxy[i] = aEyx * aHzt; +oSxz[i] = -aEzx * aHyt; +oSyz[i] = aEzy * aHxt; +oSyx[i] = -aExy * aHzt; +oSzx[i] = aExz * aHyt; +oSzy[i] = -aEyz * aHxt; +{%- endif -%} diff --git a/fdtd/kernels/update_s.cl b/fdtd/kernels/update_s.cl new file mode 100644 index 0000000..0310411 --- /dev/null +++ b/fdtd/kernels/update_s.cl @@ -0,0 +1,36 @@ +/* + * Update E-field, including any PMLs. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * + * OpenCL args: + * E, H, dt, S, oS + */ + +{{common_header}} + +////////////////////////////////////////////////////////////////////// + + +/* + * Calculate S from oS (pre-calculated components) + */ +__global ftype *Sx = S + XX; +__global ftype *Sy = S + YY; +__global ftype *Sz = S + ZZ; + +// Use unscaled S components from H locations +__global ftype *oSxy = oS + 0 * field_size; +__global ftype *oSyz = oS + 1 * field_size; +__global ftype *oSzx = oS + 2 * field_size; +__global ftype *oSxz = oS + 3 * field_size; +__global ftype *oSyx = oS + 4 * field_size; +__global ftype *oSzy = oS + 5 * field_size; + +ftype s_factor = dt * 0.125; +Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor; +Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor; +Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor; diff --git a/fdtd/simulation.py b/fdtd/simulation.py index 227dcb0..3dd9f62 100644 --- a/fdtd/simulation.py +++ b/fdtd/simulation.py @@ -3,15 +3,23 @@ Class for constructing and holding the basic FDTD operations and fields """ from typing import List, Dict, Callable +from collections import OrderedDict import numpy +import jinja2 import warnings import pyopencl import pyopencl.array from pyopencl.elementwise import ElementwiseKernel -from . import boundary, base -from .base import type_to_C +from fdfd_tools import vec + + +__author__ = 'Jan Petykiewicz' + + +# Create jinja2 env on module load +jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) class Simulation(object): @@ -20,6 +28,7 @@ class Simulation(object): """ E = None # type: List[pyopencl.array.Array] H = None # type: List[pyopencl.array.Array] + S = None # type: List[pyopencl.array.Array] eps = None # type: List[pyopencl.array.Array] dt = None # type: float @@ -30,24 +39,20 @@ class Simulation(object): update_E = None # type: Callable[[],pyopencl.Event] update_H = None # type: Callable[[],pyopencl.Event] - - conductor_E = None # type: Callable[[],pyopencl.Event] - conductor_H = None # type: Callable[[],pyopencl.Event] - - cpml_E = None # type: Callable[[],pyopencl.Event] - cpml_H = None # type: Callable[[],pyopencl.Event] - - cpml_psi_E = None # type: Dict[str, pyopencl.array.Array] - cpml_psi_H = None # type: Dict[str, pyopencl.array.Array] + update_S = None # type: Callable[[],pyopencl.Event] + sources = None # type: Dict[str, str] def __init__(self, epsilon: List[numpy.ndarray], - dt: float=.99/numpy.sqrt(3), - initial_E: List[numpy.ndarray]=None, - initial_H: List[numpy.ndarray]=None, - context: pyopencl.Context=None, - queue: pyopencl.CommandQueue=None, - float_type: numpy.float32 or numpy.float64=numpy.float32): + dt: float = .99/numpy.sqrt(3), + initial_E: List[numpy.ndarray] = None, + initial_H: List[numpy.ndarray] = None, + context: pyopencl.Context = None, + queue: pyopencl.CommandQueue = None, + float_type: numpy.float32 or numpy.float64 = numpy.float32, + pml_thickness: int = 10, + pmls: List[List[str]] = None, + do_poynting: bool = True): """ Initialize the simulation. @@ -67,7 +72,7 @@ class Simulation(object): Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) if context is None: - self.context = pyopencl.create_some_context(False) + self.context = pyopencl.create_some_context() else: self.context = context @@ -84,126 +89,149 @@ class Simulation(object): self.dt = dt self.arg_type = float_type - - self.eps = [pyopencl.array.to_device(self.queue, e.astype(float_type)) for e in epsilon] + self.sources = {} + self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type)) if initial_E is None: - self.E = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)] + self.E = pyopencl.array.zeros_like(self.eps) else: if len(initial_E) != 3: Exception('Initial_E must be a list of length 3') if not all((E.shape == epsilon[0].shape for E in initial_E)): Exception('Initial_E list elements must have same shape as epsilon elements') - self.E = [pyopencl.array.to_device(self.queue, E.astype(float_type)) for E in initial_E] + self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type)) if initial_H is None: - self.H = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)] + self.H = pyopencl.array.zeros_like(self.eps) else: if len(initial_H) != 3: Exception('Initial_H must be a list of length 3') if not all((H.shape == epsilon[0].shape for H in initial_H)): Exception('Initial_H list elements must have same shape as epsilon elements') - self.H = [pyopencl.array.to_device(self.queue, H.astype(float_type)) for H in initial_H] + self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type)) + + if pmls is None: + pmls = [[d, p] for d in 'xyz' for p in 'np'] ctype = type_to_C(self.arg_type) - E_args = [ctype + ' *E' + c for c in 'xyz'] - H_args = [ctype + ' *H' + c for c in 'xyz'] - eps_args = [ctype + ' *eps' + c for c in 'xyz'] - dt_arg = [ctype + ' dt'] + def ptr(arg: str) -> str: + return ctype + ' *' + arg - sxyz = base.shape_source(epsilon[0].shape) - E_source = sxyz + base.dixyz_source + base.maxwell_E_source - H_source = sxyz + base.dixyz_source + base.maxwell_H_source + base_fields = OrderedDict() + base_fields[ptr('E')] = self.E + base_fields[ptr('H')] = self.H + base_fields[ctype + ' dt'] = self.dt + eps_field = OrderedDict() + eps_field[ptr('eps')] = self.eps + + common_source = jinja_env.get_template('common.cl').render( + ftype=ctype, + shape=epsilon[0].shape, + ) + jinja_args = { + 'common_header': common_source, + 'pml_thickness': pml_thickness, + 'pmls': pmls, + 'do_poynting': do_poynting, + } + E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) + H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) + + self.sources['E'] = E_source + self.sources['H'] = H_source + + if do_poynting: + S_source = jinja_env.get_template('update_s.cl').render(**jinja_args) + self.sources['S'] = S_source + + self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type) + self.S = pyopencl.array.zeros_like(self.E) + S_fields = OrderedDict() + S_fields[ptr('oS')] = self.oS + S_fields[ptr('S')] = self.S + else: + S_fields = OrderedDict() + + ''' + PML + ''' + m = (3.5, 1) + sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0) + alpha_max = 0 # TODO: Decide what to do about non-zero alpha + + def par(x): + sigma = ((x / pml_thickness) ** m[0]) * sigma_max + alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max + p0 = numpy.exp(-(sigma + alpha) * dt) + p1 = sigma / (sigma + alpha) * (p0 - 1) + return p0, p1 + + xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4)) + xep -= 0.5 + xhn -= 0.5 + + pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh'] + pml_e_fields = OrderedDict() + pml_h_fields = OrderedDict() + for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)): + pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe) + pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph) + + for pml in pmls: + uv = 'xyz'.replace(pml[0], '') + psi_base = 'Psi_' + ''.join(pml) + '_' + psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] + + psi_shape = list(epsilon[0].shape) + psi_shape['xyz'.find(pml[0])] = pml_thickness + + for ne, nh in zip(*psi_names): + pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) + pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) + + self.pml_e_fields = pml_e_fields + self.pml_h_fields = pml_h_fields + + + ''' + Create operations + ''' + E_args = OrderedDict() + [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)] E_update = ElementwiseKernel(self.context, operation=E_source, - arguments=', '.join(E_args + H_args + dt_arg + eps_args)) + arguments=', '.join(E_args.keys())) + H_args = OrderedDict() + [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)] H_update = ElementwiseKernel(self.context, operation=H_source, - arguments=', '.join(E_args + H_args + dt_arg)) + arguments=', '.join(H_args.keys())) + self.update_E = lambda e: E_update(*E_args.values(), wait_for=e) + self.update_H = lambda e: H_update(*H_args.values(), wait_for=e) - self.update_E = lambda e: E_update(*self.E, *self.H, self.dt, *self.eps, wait_for=e) - self.update_H = lambda e: H_update(*self.E, *self.H, self.dt, wait_for=e) + if do_poynting: + S_args = OrderedDict() + [S_args.update(d) for d in (base_fields, S_fields)] + S_update = ElementwiseKernel(self.context, operation=S_source, + arguments=', '.join(S_args.keys())) - def init_cpml(self, pml_args: List[Dict]): - """ - Initialize absorbing layers (cpml: complex phase matched layer). PMLs are not actual - boundary conditions, so you should add a conducting boundary (.init_conductors()) for - all directions in which you add PMLs. - Allows use of self.cpml_E(events) and self.cpml_H(events). - All necessary additional fields are created on the opencl device. + self.update_S = lambda e: S_update(*S_args.values(), wait_for=e) - :param pml_args: A list containing dictionaries which are passed to .boundary.cpml(...). - The dt argument is set automatically, but the others must be passed in each entry - of pml_args. - """ - sxyz = base.shape_source(self.eps[0].shape) - # Prepare per-iteration constants for later use - pml_E_source = sxyz + base.dixyz_source + base.xyz_source - pml_H_source = sxyz + base.dixyz_source + base.xyz_source +def type_to_C(float_type) -> str: + """ + Returns a string corresponding to the C equivalent of a numpy type. + Only works for float16, float32, float64. - psi_E = [] - psi_H = [] - psi_E_names = [] - psi_H_names = [] - for arg_set in pml_args: - pml_data = boundary.cpml(dt=self.dt, **arg_set) - - pml_E_source += pml_data['E'] - pml_H_source += pml_data['H'] - - ti = numpy.delete(range(3), arg_set['direction']) - trans = [self.eps[0].shape[i] for i in ti] - psi_shape = (8, trans[0], trans[1]) - - psi_E += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type) - for _ in pml_data['psi_E']] - psi_H += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type) - for _ in pml_data['psi_H']] - - psi_E_names += pml_data['psi_E'] - psi_H_names += pml_data['psi_H'] - - ctype = type_to_C(self.arg_type) - E_args = [ctype + ' *E' + c for c in 'xyz'] - H_args = [ctype + ' *H' + c for c in 'xyz'] - eps_args = [ctype + ' *eps' + c for c in 'xyz'] - dt_arg = [ctype + ' dt'] - arglist_E = [ctype + ' *' + psi for psi in psi_E_names] - arglist_H = [ctype + ' *' + psi for psi in psi_H_names] - pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E) - pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H) - - pml_E = ElementwiseKernel(self.context, arguments=pml_E_args, operation=pml_E_source) - pml_H = ElementwiseKernel(self.context, arguments=pml_H_args, operation=pml_H_source) - - self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, wait_for=e) - self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, wait_for=e) - self.cmpl_psi_E = {k: v for k, v in zip(psi_E_names, psi_E)} - self.cmpl_psi_H = {k: v for k, v in zip(psi_H_names, psi_H)} - - def init_conductors(self, conductor_args: List[Dict]): - """ - Initialize reflecting boundary conditions. - Allows use of self.conductor_E(events) and self.conductor_H(events). - - :param conductor_args: List of dictionaries with which to call .boundary.conductor(...). - """ - - sxyz = base.shape_source(self.eps[0].shape) - - # Prepare per-iteration constants - bc_E_source = sxyz + base.dixyz_source + base.xyz_source - bc_H_source = sxyz + base.dixyz_source + base.xyz_source - for arg_set in conductor_args: - [e, h] = boundary.conductor(**arg_set) - bc_E_source += e - bc_H_source += h - - E_args = [type_to_C(self.arg_type) + ' *E' + c for c in 'xyz'] - H_args = [type_to_C(self.arg_type) + ' *H' + c for c in 'xyz'] - bc_E = ElementwiseKernel(self.context, arguments=E_args, operation=bc_E_source) - bc_H = ElementwiseKernel(self.context, arguments=H_args, operation=bc_H_source) - - self.conductor_E = lambda e: bc_E(*self.E, wait_for=e) - self.conductor_H = lambda e: bc_H(*self.H, wait_for=e) + :param float_type: e.g. numpy.float32 + :return: string containing the corresponding C type (eg. 'double') + """ + if float_type == numpy.float16: + arg_type = 'half' + elif float_type == numpy.float32: + arg_type = 'float' + elif float_type == numpy.float64: + arg_type = 'double' + else: + raise Exception('Unsupported type') + return arg_type diff --git a/requirements.txt b/requirements.txt index 6121670..6ed651b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ numpy h5py pyopencl +jinja2 -e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster -e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock -e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque +-e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools From c1750e9fde087ad26ecad5434380bbd1f7e7be28 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Wed, 29 Mar 2017 01:09:21 -0700 Subject: [PATCH 05/33] Update readme --- README.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5da2b6b..fbc972b 100644 --- a/README.md +++ b/README.md @@ -5,28 +5,32 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs). **Performance** highly depends on what hardware you have available: * A 395x345x73 cell simulation (~10 million points, 8-cell absorbing boundaries) - runs at around 42 iterations/sec. on my Nvidia GTX 580. -* On my laptop (Nvidia 940M) the same simulation achieves ~8 iterations/sec. + runs at around 91 iterations/sec. on my AMD RX480. +* On an Nvidia GTX 580, it runs at 66 iterations/sec +* On my laptop (Nvidia 940M) the same simulation achieves ~12 iterations/sec. * An L3 photonic crystal cavity ringdown simulation (1550nm source, 40nm - discretization, 8000 steps) takes about 5 minutes on my laptop. + discretization, 8000 steps) takes about 3 minutes on my laptop. **Capabilities** are currently pretty minimal: * Absorbing boundaries (CPML) -* Conducting boundaries (PMC) +* Perfect electrical conductors (PECs; to use set epsilon to inf) * Anisotropic media (eps_xx, eps_yy, eps_zz, mu_xx, ...) * Direct access to fields (eg., you can trivially add a soft or hard - current source with just sim.E[1] += sin(f0 * t), or save any portion + current source with just sim.E[ind] += sin(f0 * t), or save any portion of a field to a file) + ## Installation **Requirements:** * python 3 (written and tested with 3.5) * numpy * pyopencl -* h5py (for file output) +* jinja2 +* dill (for file output) * [gridlock](https://mpxd.net/gogs/jan/gridlock) * [masque](https://mpxd.net/gogs/jan/masque) +* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) To get the code, just clone this repository: ```bash From 97d9901e4b0356d4a8ab77d65013e7f5b73bc009 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sat, 12 Aug 2017 19:20:29 -0700 Subject: [PATCH 06/33] Use logging package for output --- fdtd.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fdtd.py b/fdtd.py index bbdabec..961bc0e 100644 --- a/fdtd.py +++ b/fdtd.py @@ -6,6 +6,7 @@ See main() for simulation setup. import sys import time +import logging import numpy import lzma @@ -20,6 +21,9 @@ import fdfd_tools __author__ = 'Jan Petykiewicz' +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: """ @@ -120,7 +124,7 @@ def main(): eps=n_air**2, polygons=mask.as_polygons()) - print('grid shape: {}'.format(grid.shape)) + logger.info('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) @@ -157,7 +161,7 @@ def main(): e.wait() if t % 100 == 0: - print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) + logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) sys.stdout.flush() with lzma.open('saved_simulation', 'wb') as f: From 0d91f0d43efba28e91dc1a88b5f3acab9294b799 Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 14 Aug 2017 15:41:20 -0700 Subject: [PATCH 07/33] rename lib --- fdtd.py | 7 ++++--- fdtd/__init__.py | 0 opencl_fdtd/__init__.py | 1 + {fdtd => opencl_fdtd}/kernels/common.cl | 0 {fdtd => opencl_fdtd}/kernels/update_e.cl | 0 {fdtd => opencl_fdtd}/kernels/update_h.cl | 0 {fdtd => opencl_fdtd}/kernels/update_s.cl | 0 {fdtd => opencl_fdtd}/simulation.py | 6 +++--- 8 files changed, 8 insertions(+), 6 deletions(-) delete mode 100644 fdtd/__init__.py create mode 100644 opencl_fdtd/__init__.py rename {fdtd => opencl_fdtd}/kernels/common.cl (100%) rename {fdtd => opencl_fdtd}/kernels/update_e.cl (100%) rename {fdtd => opencl_fdtd}/kernels/update_h.cl (100%) rename {fdtd => opencl_fdtd}/kernels/update_s.cl (100%) rename {fdtd => opencl_fdtd}/simulation.py (99%) diff --git a/fdtd.py b/fdtd.py index bbdabec..13af0fd 100644 --- a/fdtd.py +++ b/fdtd.py @@ -8,10 +8,10 @@ import sys import time import numpy -import lzma +import lzma import dill -from fdtd.simulation import Simulation +from opencl_fdtd import Simulation from masque import Pattern, shapes import gridlock import pcgen @@ -133,7 +133,7 @@ def main(): def field_source(i): t0 = i * sim.dt - delay return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2) - + with open('sources.c', 'w') as f: f.write(sim.sources['E']) f.write('\n==========================================\n') @@ -172,5 +172,6 @@ def main(): d['S'] = unvec(sim.S.get()) dill.dump(d, f) + if __name__ == '__main__': main() diff --git a/fdtd/__init__.py b/fdtd/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py new file mode 100644 index 0000000..e3e95b4 --- /dev/null +++ b/opencl_fdtd/__init__.py @@ -0,0 +1 @@ +from .simulation import Simulation, type_to_C diff --git a/fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl similarity index 100% rename from fdtd/kernels/common.cl rename to opencl_fdtd/kernels/common.cl diff --git a/fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl similarity index 100% rename from fdtd/kernels/update_e.cl rename to opencl_fdtd/kernels/update_e.cl diff --git a/fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl similarity index 100% rename from fdtd/kernels/update_h.cl rename to opencl_fdtd/kernels/update_h.cl diff --git a/fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl similarity index 100% rename from fdtd/kernels/update_s.cl rename to opencl_fdtd/kernels/update_s.cl diff --git a/fdtd/simulation.py b/opencl_fdtd/simulation.py similarity index 99% rename from fdtd/simulation.py rename to opencl_fdtd/simulation.py index f1a8dec..77a95df 100644 --- a/fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -152,7 +152,7 @@ class Simulation(object): S_fields[ptr('oS')] = self.oS S_fields[ptr('S')] = self.S else: - S_fields = OrderedDict() + S_fields = OrderedDict() ''' PML @@ -167,7 +167,7 @@ class Simulation(object): p0 = numpy.exp(-(sigma + alpha) * dt) p1 = sigma / (sigma + alpha) * (p0 - 1) return p0, p1 - + xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4)) xep -= 0.5 xhn -= 0.5 @@ -190,7 +190,7 @@ class Simulation(object): for ne, nh in zip(*psi_names): pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) - + self.pml_e_fields = pml_e_fields self.pml_h_fields = pml_h_fields From a85f5477490f8f175dc42e2e61a67026b4c9063a Mon Sep 17 00:00:00 2001 From: jan Date: Thu, 24 Aug 2017 11:28:03 -0700 Subject: [PATCH 08/33] doc updates --- README.md | 4 ++- opencl_fdtd/kernels/update_e.cl | 4 +-- opencl_fdtd/kernels/update_h.cl | 4 +-- opencl_fdtd/kernels/update_s.cl | 8 ++--- opencl_fdtd/simulation.py | 52 ++++++++++++++++++++++++++++++--- 5 files changed, 59 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index fbc972b..26d5acd 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,12 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs). * numpy * pyopencl * jinja2 +* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) + +Optional (used for examples): * dill (for file output) * [gridlock](https://mpxd.net/gogs/jan/gridlock) * [masque](https://mpxd.net/gogs/jan/masque) -* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) To get the code, just clone this repository: ```bash diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 2728b46..4bda993 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -1,11 +1,11 @@ /* * Update E-field, including any PMLs. - * + * * Template parameters: * common_header: Rendered contents of common.cl * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities * pml_thickness: Number of cells (integer) - * + * * OpenCL args: * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] */ diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index e093cb9..8519b51 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -1,12 +1,12 @@ /* * Update H-field, including any PMLs. * Also precalculate values for poynting vector if necessary. - * + * * Template parameters: * common_header: Rendered contents of common.cl * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities * pml_thickness: Number of cells (integer) - * do_poynting: Whether to precalculate poynting vector components (boolean) + * do_poynting: Whether to precalculate poynting vector components (boolean) * * OpenCL args: * E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS] diff --git a/opencl_fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl index 0310411..94e061e 100644 --- a/opencl_fdtd/kernels/update_s.cl +++ b/opencl_fdtd/kernels/update_s.cl @@ -1,11 +1,11 @@ /* * Update E-field, including any PMLs. - * + * * Template parameters: * common_header: Rendered contents of common.cl * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities * pml_thickness: Number of cells (integer) - * + * * OpenCL args: * E, H, dt, S, oS */ @@ -17,12 +17,12 @@ /* * Calculate S from oS (pre-calculated components) - */ + */ __global ftype *Sx = S + XX; __global ftype *Sy = S + YY; __global ftype *Sz = S + ZZ; -// Use unscaled S components from H locations +// Use unscaled S components from H locations __global ftype *oSxy = oS + 0 * field_size; __global ftype *oSyz = oS + 1 * field_size; __global ftype *oSzx = oS + 2 * field_size; diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 77a95df..7a49f39 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -25,6 +25,35 @@ jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) class Simulation(object): """ Constructs and holds the basic FDTD operations and related fields + + After constructing this object, call the (update_E, update_H, update_S) members + to perform FDTD updates on the stored (E, H, S) fields: + + sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + with open('sources.c', 'w') as f: + f.write('{}'.format(sim.sources)) + + for t in range(max_t): + sim.update_E([]).wait() + + # Find the linear index for the center point, for Ey + ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \ + numpy.prod(grid.shape) * 1 + # Perturb the field (i.e., add a soft current source) + sim.E[ind] += numpy.sin(omega * t * sim.dt) + event = sim.update_H([]) + if sim.update_S: + event = sim.update_S([event]) + event.wait() + + with lzma.open('saved_simulation', 'wb') as f: + dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f) + + Code in the form + event2 = sim.update_H([event0, event1]) + indicates that the update_H operation should be prepared immediately, but wait for + event0 and event1 to occur (i.e. previous operations to finish) before starting execution. + event2 can then be used to prepare further operations to be run after update_H. """ E = None # type: List[pyopencl.array.Array] H = None # type: List[pyopencl.array.Array] @@ -37,9 +66,9 @@ class Simulation(object): context = None # type: pyopencl.Context queue = None # type: pyopencl.CommandQueue - update_E = None # type: Callable[[],pyopencl.Event] - update_H = None # type: Callable[[],pyopencl.Event] - update_S = None # type: Callable[[],pyopencl.Event] + update_E = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_H = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_S = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] sources = None # type: Dict[str, str] def __init__(self, @@ -50,8 +79,8 @@ class Simulation(object): context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, float_type: numpy.float32 or numpy.float64 = numpy.float32, - pml_thickness: int = 10, pmls: List[List[str]] = None, + pml_thickness: int = 10, do_poynting: bool = True): """ Initialize the simulation. @@ -64,6 +93,21 @@ class Simulation(object): :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. + :param pmls: List of [axis, direction] pairs which specify simluation boundaries to be + 'coated' with a PML (absorbing layer). Axis should be one of 'x', 'y', 'z', and + direction should be one of 'n', 'p' (i.e., negative, positive). + Default is to apply PMLs to all six boundaries. + :param pml_thickness: Thickness of any PMLs, in number of grid cells. Default 10. + :param do_poynting: If true, enables calculation of the poynting vector, S. + Poynting vector calculation adds the following computational burdens: + * During update_H, ~6 extra additions/cell are performed in order to spatially + average E and temporally average H. These quantities are multiplied + (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly). + * update_S performs a discrete cross product using the precalculated products + from update_H. This is not nice to the cache and similar to e.g. update_E + in complexity. + * GPU memory requirements are approximately doubled, since S and the intermediate + products must be stored. """ if len(epsilon) != 3: From d02cd18403e8c07b8c3de7a10b335fec180a148a Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 12:34:11 -0700 Subject: [PATCH 09/33] improve pml specification --- fdtd.py | 4 +- opencl_fdtd/kernels/update_e.cl | 17 +++---- opencl_fdtd/kernels/update_h.cl | 20 ++++---- opencl_fdtd/simulation.py | 82 +++++++++++++++++++-------------- 4 files changed, 69 insertions(+), 54 deletions(-) diff --git a/fdtd.py b/fdtd.py index 5d0f4c3..08606c6 100644 --- a/fdtd.py +++ b/fdtd.py @@ -126,7 +126,9 @@ def main(): logger.info('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### - sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness} + for a in 'xyz' for p in 'np'] + sim = Simulation(grid.grids, do_poynting=True, pmls=pmls) # Source parameters and function w = 2 * numpy.pi * dx / wl diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 4bda993..f7304e7 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -3,8 +3,8 @@ * * Template parameters: * common_header: Rendered contents of common.cl - * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities - * pml_thickness: Number of cells (integer) + * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing + axes, polarities, and thicknesses. * * OpenCL args: * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] @@ -18,9 +18,6 @@ __global ftype *epsx = eps + XX; __global ftype *epsy = eps + YY; __global ftype *epsz = eps + ZZ; -{% if pmls -%} -const int pml_thickness = {{pml_thickness}}; -{%- endif %} /* * Precalclate derivatives @@ -42,7 +39,9 @@ ftype pExi = 0; ftype pEyi = 0; ftype pEzi = 0; -{% for r, p in pmls -%} +{% for pml in pmls -%} + {%- set r = pml['axis'] -%} + {%- set p = pml['polarity'] -%} {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} {%- if r != 'y' -%} @@ -51,14 +50,16 @@ ftype pEzi = 0; {%- set se, sh = '+', '-' -%} {%- endif -%} +pml_{{r ~ p}}_thickness = {{pml['thickness']}}; + {%- if p == 'n' %} -if ( {{r}} < pml_thickness ) { +if ( {{r}} < pml_{{r ~ p}_thickness ) { const size_t ir = {{r}}; // index into pml parameters {%- elif p == 'p' %} -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}_thickness ) { const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters {%- endif %} diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 8519b51..7c648b6 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -4,22 +4,18 @@ * * Template parameters: * common_header: Rendered contents of common.cl - * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities - * pml_thickness: Number of cells (integer) + * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing + axes, polarities, and thicknesses. * do_poynting: Whether to precalculate poynting vector components (boolean) * * OpenCL args: - * E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS] + * E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] */ {{common_header}} //////////////////////////////////////////////////////////////////////////// -{% if pmls -%} -const int pml_thickness = {{pml_thickness}}; -{%- endif %} - /* * Precalculate derivatives */ @@ -57,7 +53,9 @@ ftype pHxi = 0; ftype pHyi = 0; ftype pHzi = 0; -{%- for r, p in pmls -%} +{% for pml in pmls -%} + {%- set r = pml['axis'] -%} + {%- set p = pml['polarity'] -%} {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%} {%- if r != 'y' -%} @@ -66,14 +64,16 @@ ftype pHzi = 0; {%- set se, sh = '+', '-' -%} {%- endif -%} +pml_{{r ~ p}}_thickness = {{pml['thickness']}}; + {%- if p == 'n' %} -if ( {{r}} < pml_thickness ) { +if ( {{r}} < pml_{{r ~ p}}_thickness ) { const size_t ir = {{r}}; // index into pml parameters {%- elif p == 'p' %} -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters {%- endif %} diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 7a49f39..78460ec 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -29,7 +29,8 @@ class Simulation(object): After constructing this object, call the (update_E, update_H, update_S) members to perform FDTD updates on the stored (E, H, S) fields: - sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np'] + sim = Simulation(grid.grids, do_poynting=True, pmls=pmls) with open('sources.c', 'w') as f: f.write('{}'.format(sim.sources)) @@ -73,31 +74,34 @@ class Simulation(object): def __init__(self, epsilon: List[numpy.ndarray], + pmls: List[Dict[str, int or float]], dt: float = .99/numpy.sqrt(3), initial_E: List[numpy.ndarray] = None, initial_H: List[numpy.ndarray] = None, context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, float_type: numpy.float32 or numpy.float64 = numpy.float32, - pmls: List[List[str]] = None, - pml_thickness: int = 10, do_poynting: bool = True): """ Initialize the simulation. :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray spanning the simulation domain. Relative epsilon is used. - :param dt: Time step. Default is the Courant factor. + :param pmls: List of dicts with keys: + 'axis': One of 'x', 'y', 'z'. + 'direction': One of 'n', 'p'. + 'thickness': Number of layers, default 8. + 'epsilon_eff': Effective epsilon to match to. Default 1.0. + 'mu_eff': Effective mu to match to. Default 1.0. + 'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6. + 'm': Polynomial grading exponent. Default 3.5. + 'ma': Exponent for alpha. Default 1. + :param dt: Time step. Default is .99/sqrt(3). :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon. :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon. :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. - :param pmls: List of [axis, direction] pairs which specify simluation boundaries to be - 'coated' with a PML (absorbing layer). Axis should be one of 'x', 'y', 'z', and - direction should be one of 'n', 'p' (i.e., negative, positive). - Default is to apply PMLs to all six boundaries. - :param pml_thickness: Thickness of any PMLs, in number of grid cells. Default 10. :param do_poynting: If true, enables calculation of the poynting vector, S. Poynting vector calculation adds the following computational burdens: * During update_H, ~6 extra additions/cell are performed in order to spatially @@ -154,8 +158,13 @@ class Simulation(object): Exception('Initial_H list elements must have same shape as epsilon elements') self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type)) - if pmls is None: - pmls = [[d, p] for d in 'xyz' for p in 'np'] + for pml in pmls: + pml.setdefault('thickness', 8) + pml.setdefault('epsilon_eff', 1.0) + pml.setdefault('mu_eff', 1.0) + pml.setdefault('ln_R_per_layer', -1.6) + pml.setdefault('m', 3.5) + pml.setdefault('ma', 1) ctype = type_to_C(self.arg_type) @@ -176,7 +185,6 @@ class Simulation(object): ) jinja_args = { 'common_header': common_source, - 'pml_thickness': pml_thickness, 'pmls': pmls, 'do_poynting': do_poynting, } @@ -201,35 +209,39 @@ class Simulation(object): ''' PML ''' - m = (3.5, 1) - sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0) - alpha_max = 0 # TODO: Decide what to do about non-zero alpha - - def par(x): - sigma = ((x / pml_thickness) ** m[0]) * sigma_max - alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max - p0 = numpy.exp(-(sigma + alpha) * dt) - p1 = sigma / (sigma + alpha) * (p0 - 1) - return p0, p1 - - xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4)) - xep -= 0.5 - xhn -= 0.5 - - pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh'] pml_e_fields = OrderedDict() pml_h_fields = OrderedDict() - for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)): - pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe) - pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph) - for pml in pmls: - uv = 'xyz'.replace(pml[0], '') - psi_base = 'Psi_' + ''.join(pml) + '_' + a = 'xyz'.find(pml['axis']) + + sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) / \ + numpy.sqrt(pml['epsilon_eff'] * pml['mu_eff']) + alpha_max = 0 # TODO: Nonzero alpha + + def par(x): + sigma = ((x / pml['thickness']) ** pml['m']) * sigma_max + alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max + p0 = numpy.exp(-(sigma + alpha) * dt) + p1 = sigma / (sigma + alpha) * (p0 - 1) + return p0, p1 + + xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=float_type)[::-1] for _ in range(2)) + if pml['polarity'] == 'p': + xe -= 0.5 + elif pml['polarity'] == 'n': + xh -= 0.5 + + pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '01'] for eh in 'eh'] + for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)): + pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe) + pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph) + + uv = 'xyz'.replace(pml['axis'], '') + psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_' psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] psi_shape = list(epsilon[0].shape) - psi_shape['xyz'.find(pml[0])] = pml_thickness + psi_shape[a] = pml['thickness'] for ne, nh in zip(*psi_names): pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) From 6bd756c3d3ab8025ae3f317cc9e8e243987bf7a4 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 12:37:56 -0700 Subject: [PATCH 10/33] add setup.py --- setup.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a2f9f9d --- /dev/null +++ b/setup.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='opencl_fdtd', + version='0.4', + description='OpenCL FDTD solver', + author='Jan Petykiewicz', + author_email='anewusername@gmail.com', + url='https://mpxd.net/gogs/jan/opencl_fdtd', + packages=find_packages(), + package_data={ + 'opencl_fdfd': ['kernels/*'] + }, + install_requires=[ + 'numpy', + 'pyopencl', + 'jinja2', + 'fdfd_tools>=0.3', + ], + extras_require={ + }, + ) + From 9772f47a3482eb7a57872c0219d85753db8822ef Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 14:01:22 -0700 Subject: [PATCH 11/33] fix typo --- opencl_fdtd/kernels/update_e.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index f7304e7..9162b21 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -54,12 +54,12 @@ pml_{{r ~ p}}_thickness = {{pml['thickness']}}; {%- if p == 'n' %} -if ( {{r}} < pml_{{r ~ p}_thickness ) { +if ( {{r}} < pml_{{r ~ p}}_thickness ) { const size_t ir = {{r}}; // index into pml parameters {%- elif p == 'p' %} -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}_thickness ) { +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters {%- endif %} From 6a03977a699a1cf5035e48f10d9b02496b0978f5 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 13:11:19 -0700 Subject: [PATCH 12/33] fix pml param names --- opencl_fdtd/kernels/update_e.cl | 4 ++-- opencl_fdtd/kernels/update_h.cl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 9162b21..63a0c6f 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -64,8 +64,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi - {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}}; - {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}}; + {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; + {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}}; pE{{u}}i {{se}}= {{psi ~ u}}[ip]; pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; } diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 7c648b6..cdda3a9 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -78,8 +78,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi - {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}}; - {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}}; + {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}}; + {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}}; pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; pH{{v}}i {{se}}= {{psi ~ v}}[ip]; } From a67009d1c889208f1d6ec5fe2d91788d24a898b5 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 13:14:27 -0700 Subject: [PATCH 13/33] fix declaration --- opencl_fdtd/kernels/update_e.cl | 2 +- opencl_fdtd/kernels/update_h.cl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 63a0c6f..eed4ee5 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -50,7 +50,7 @@ ftype pEzi = 0; {%- set se, sh = '+', '-' -%} {%- endif -%} -pml_{{r ~ p}}_thickness = {{pml['thickness']}}; +int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; {%- if p == 'n' %} diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index cdda3a9..27d7a47 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -64,7 +64,7 @@ ftype pHzi = 0; {%- set se, sh = '+', '-' -%} {%- endif -%} -pml_{{r ~ p}}_thickness = {{pml['thickness']}}; +int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; {%- if p == 'n' %} From aab57412a5b1ab39a4f2426c697612f20f1f3221 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Mon, 15 Jan 2018 22:36:40 -0800 Subject: [PATCH 14/33] move code to new location --- README.md | 8 ++++---- requirements.txt | 8 ++++---- setup.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 26d5acd..731450b 100644 --- a/README.md +++ b/README.md @@ -27,16 +27,16 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs). * numpy * pyopencl * jinja2 -* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) +* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools) Optional (used for examples): * dill (for file output) -* [gridlock](https://mpxd.net/gogs/jan/gridlock) -* [masque](https://mpxd.net/gogs/jan/masque) +* [gridlock](https://mpxd.net/code/jan/gridlock) +* [masque](https://mpxd.net/code/jan/masque) To get the code, just clone this repository: ```bash -git clone https://mpxd.net/gogs/jan/opencl_fdtd.git +git clone https://mpxd.net/code/jan/opencl_fdtd.git ``` You can install the requirements and their dependencies easily with diff --git a/requirements.txt b/requirements.txt index 6ed651b..6626171 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy h5py pyopencl jinja2 --e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster --e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock --e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque --e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools +-e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster +-e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock +-e git+https://mpxd.net/code/jan/masque.git@release#egg=masque +-e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools diff --git a/setup.py b/setup.py index a2f9f9d..55f5af9 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup(name='opencl_fdtd', description='OpenCL FDTD solver', author='Jan Petykiewicz', author_email='anewusername@gmail.com', - url='https://mpxd.net/gogs/jan/opencl_fdtd', + url='https://mpxd.net/code/jan/opencl_fdtd', packages=find_packages(), package_data={ 'opencl_fdfd': ['kernels/*'] From c8298d916fbec02925030d977288ab411d6d49c9 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 16 Sep 2018 20:12:26 -0700 Subject: [PATCH 15/33] Use python3 for setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 55f5af9..c7e55f3 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from setuptools import setup, find_packages From d0b303523e735193c021a7b7de64841dbcc47bc3 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 16 Sep 2018 20:12:41 -0700 Subject: [PATCH 16/33] Move version string into module --- opencl_fdtd/__init__.py | 4 ++++ setup.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py index e3e95b4..b621c19 100644 --- a/opencl_fdtd/__init__.py +++ b/opencl_fdtd/__init__.py @@ -1 +1,5 @@ from .simulation import Simulation, type_to_C + +__author__ = 'Jan Petykiewicz' + +version = '0.4' diff --git a/setup.py b/setup.py index c7e55f3..8b098db 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 from setuptools import setup, find_packages +import opencl_fdtd setup(name='opencl_fdtd', - version='0.4', + version=opencl_fdtd.version, description='OpenCL FDTD solver', author='Jan Petykiewicz', author_email='anewusername@gmail.com', From 1de6fb0e39f39ad2b047116668ab44de0a5429e3 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 16 Sep 2018 20:12:52 -0700 Subject: [PATCH 17/33] Use readme as long_description --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index 8b098db..b7fd3cf 100644 --- a/setup.py +++ b/setup.py @@ -3,9 +3,14 @@ from setuptools import setup, find_packages import opencl_fdtd +with open('README.md', 'r') as f: + long_description = f.read() + setup(name='opencl_fdtd', version=opencl_fdtd.version, description='OpenCL FDTD solver', + long_description=long_description, + long_description_content_type='text/markdown', author='Jan Petykiewicz', author_email='anewusername@gmail.com', url='https://mpxd.net/code/jan/opencl_fdtd', From ea5e298023e74eb3c1865658cfdaedb1f06d9c58 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 01:35:20 -0800 Subject: [PATCH 18/33] Explicitly cast to int --- opencl_fdtd/kernels/common.cl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opencl_fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl index d203ca7..deecec9 100644 --- a/opencl_fdtd/kernels/common.cl +++ b/opencl_fdtd/kernels/common.cl @@ -73,9 +73,9 @@ __global ftype *Hz = H + ZZ; * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . */ {% for r in 'xyz' %} -int m{{r}} = -di{{r}}; -int p{{r}} = +di{{r}}; -int wrap_{{r}} = (s{{r}} - 1) * di{{r}}; +int m{{r}} = - (int) di{{r}}; +int p{{r}} = + (int) di{{r}}; +int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}}; if ( {{r}} == 0 ) { m{{r}} = wrap_{{r}}; } else if ( {{r}} == s{{r}} - 1 ) { From 1e874cb0c029cae2beeb4a0fae5d60de53106a83 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 01:36:13 -0800 Subject: [PATCH 19/33] Fix sign on H component of PML --- opencl_fdtd/kernels/update_h.cl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 27d7a47..968c2b6 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -96,9 +96,9 @@ ftype Hz_old = Hz[i]; {%- endif %} // H update equations -Hx[i] -= dt * (dEzy - dEyz + pHxi); -Hy[i] -= dt * (dExz - dEzx + pHyi); -Hz[i] -= dt * (dEyx - dExy + pHzi); +Hx[i] -= dt * (dEzy - dEyz - pHxi); +Hy[i] -= dt * (dExz - dEzx - pHyi); +Hz[i] -= dt * (dEyx - dExy - pHzi); {% if do_poynting -%} // Average H across timesteps From 2b1d906665c6394561b613ba62af832bbfb72ea3 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 01:38:28 -0800 Subject: [PATCH 20/33] Add _create_field() and _create_eps() --- opencl_fdtd/simulation.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 78460ec..525d818 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -273,7 +273,24 @@ class Simulation(object): arguments=', '.join(S_args.keys())) self.update_S = lambda e: S_update(*S_args.values(), wait_for=e) + def _create_eps(self, epsilon: List[numpy.ndarray]): + if len(epsilon) != 3: + raise Exception('Epsilon must be a list with length of 3') + if not all((e.shape == epsilon[0].shape for e in epsilon[1:])): + raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) + if not epsilon[0].shape == self.shape: + raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape)) + self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type)) + def _create_field(self, initial_value: List[numpy.ndarray] = None): + if initial_value is None: + return pyopencl.array.zeros_like(self.eps) + else: + if len(initial_value) != 3: + Exception('Initial field value must be a list of length 3') + if not all((f.shape == self.shape for f in initial_value)): + Exception('Initial field list elements must have same shape as epsilon elements') + return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type)) def type_to_C(float_type) -> str: """ From f00c8b4a3eb921fc8ed118f8b7f240178a5f3c47 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 01:59:05 -0800 Subject: [PATCH 21/33] Add _create_context(), _create_operation(), and _create_pmls(), and generalize initial field value args --- opencl_fdtd/simulation.py | 116 ++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 66 deletions(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 525d818..7b94c64 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -76,8 +76,7 @@ class Simulation(object): epsilon: List[numpy.ndarray], pmls: List[Dict[str, int or float]], dt: float = .99/numpy.sqrt(3), - initial_E: List[numpy.ndarray] = None, - initial_H: List[numpy.ndarray] = None, + initial_fields: Dict[str, List[numpy.ndarray]] = None, context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, float_type: numpy.float32 or numpy.float64 = numpy.float32, @@ -113,21 +112,14 @@ class Simulation(object): * GPU memory requirements are approximately doubled, since S and the intermediate products must be stored. """ + if initial_fields is None: + initial_fields = {} - if len(epsilon) != 3: - Exception('Epsilon must be a list with length of 3') - if not all((e.shape == epsilon[0].shape for e in epsilon[1:])): - Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) - - if context is None: - self.context = pyopencl.create_some_context() - else: - self.context = context - - if queue is None: - self.queue = pyopencl.CommandQueue(self.context) - else: - self.queue = queue + self.shape = epsilon[0].shape + self.arg_type = float_type + self.sources = {} + self._create_context(context, queue) + self._create_eps(epsilon) if dt > .99/numpy.sqrt(3): warnings.warn('Warning: unstable dt: {}'.format(dt)) @@ -136,27 +128,8 @@ class Simulation(object): else: self.dt = dt - self.arg_type = float_type - self.sources = {} - self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type)) - - if initial_E is None: - self.E = pyopencl.array.zeros_like(self.eps) - else: - if len(initial_E) != 3: - Exception('Initial_E must be a list of length 3') - if not all((E.shape == epsilon[0].shape for E in initial_E)): - Exception('Initial_E list elements must have same shape as epsilon elements') - self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type)) - - if initial_H is None: - self.H = pyopencl.array.zeros_like(self.eps) - else: - if len(initial_H) != 3: - Exception('Initial_H must be a list of length 3') - if not all((H.shape == epsilon[0].shape for H in initial_H)): - Exception('Initial_H list elements must have same shape as epsilon elements') - self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type)) + self.E = self._create_field(initial_fields.get('E', None)) + self.H = self._create_field(initial_fields.get('H', None)) for pml in pmls: pml.setdefault('thickness', 8) @@ -181,7 +154,7 @@ class Simulation(object): common_source = jinja_env.get_template('common.cl').render( ftype=ctype, - shape=epsilon[0].shape, + shape=self.shape, ) jinja_args = { 'common_header': common_source, @@ -194,21 +167,37 @@ class Simulation(object): self.sources['E'] = E_source self.sources['H'] = H_source + + + S_fields = OrderedDict() if do_poynting: S_source = jinja_env.get_template('update_s.cl').render(**jinja_args) self.sources['S'] = S_source - self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type) + self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type) self.S = pyopencl.array.zeros_like(self.E) - S_fields = OrderedDict() S_fields[ptr('oS')] = self.oS S_fields[ptr('S')] = self.S - else: - S_fields = OrderedDict() ''' PML ''' + pml_e_fields, pml_h_fields = self._create_pmls(pmls) + + ''' + Create operations + ''' + self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields)) + self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) + if do_poynting: + self.update_S = self._create_operation(S_source, (base_fields, S_fields)) + + + def _create_pmls(self, pmls): + ctype = type_to_C(self.arg_type) + def ptr(arg: str) -> str: + return ctype + ' *' + arg + pml_e_fields = OrderedDict() pml_h_fields = OrderedDict() for pml in pmls: @@ -225,7 +214,7 @@ class Simulation(object): p1 = sigma / (sigma + alpha) * (p0 - 1) return p0, p1 - xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=float_type)[::-1] for _ in range(2)) + xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2)) if pml['polarity'] == 'p': xe -= 0.5 elif pml['polarity'] == 'n': @@ -240,39 +229,34 @@ class Simulation(object): psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_' psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] - psi_shape = list(epsilon[0].shape) + psi_shape = list(self.shape) psi_shape[a] = pml['thickness'] for ne, nh in zip(*psi_names): pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) + return pml_e_fields, pml_h_fields - self.pml_e_fields = pml_e_fields - self.pml_h_fields = pml_h_fields + def _create_operation(self, source, args_fields): + args = OrderedDict() + [args.update(d) for d in args_fields] + update = ElementwiseKernel(self.context, operation=source, + arguments=', '.join(args.keys())) + return lambda e: update(*args.values(), wait_for=e) - ''' - Create operations - ''' - E_args = OrderedDict() - [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)] - E_update = ElementwiseKernel(self.context, operation=E_source, - arguments=', '.join(E_args.keys())) + def _create_context(self, context: pyopencl.Context = None, + queue: pyopencl.CommandQueue = None): + if context is None: + self.context = pyopencl.create_some_context() + else: + self.context = context - H_args = OrderedDict() - [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)] - H_update = ElementwiseKernel(self.context, operation=H_source, - arguments=', '.join(H_args.keys())) - self.update_E = lambda e: E_update(*E_args.values(), wait_for=e) - self.update_H = lambda e: H_update(*H_args.values(), wait_for=e) + if queue is None: + self.queue = pyopencl.CommandQueue(self.context) + else: + self.queue = queue - if do_poynting: - S_args = OrderedDict() - [S_args.update(d) for d in (base_fields, S_fields)] - S_update = ElementwiseKernel(self.context, operation=S_source, - arguments=', '.join(S_args.keys())) - - self.update_S = lambda e: S_update(*S_args.values(), wait_for=e) def _create_eps(self, epsilon: List[numpy.ndarray]): if len(epsilon) != 3: raise Exception('Epsilon must be a list with length of 3') From cb471df1827ac726e3a36d72a9868177897b90ba Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 02:00:30 -0800 Subject: [PATCH 22/33] Implement proper kappa for PML --- opencl_fdtd/kernels/update_e.cl | 2 ++ opencl_fdtd/kernels/update_h.cl | 4 ++++ opencl_fdtd/simulation.py | 19 +++++++++++-------- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index eed4ee5..5a9ed1a 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -64,6 +64,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + dH{{v ~ r}} *= p{{r}}2e{{p}}[ir]; + dH{{u ~ r}} *= p{{r}}2e{{p}}[ir]; {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}}; pE{{u}}i {{se}}= {{psi ~ u}}[ip]; diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 968c2b6..56d9bbc 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -45,6 +45,8 @@ ftype aEzy = Ez[i + py] + Ez[i]; {%- endif %} + + /* * PML Update */ @@ -78,6 +80,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + dE{{v ~ r}} *= p{{r}}2h{{p}}[ir]; + dE{{u ~ r}} *= p{{r}}2h{{p}}[ir]; {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}}; {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}}; pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 7b94c64..1b6d9a2 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -203,16 +203,19 @@ class Simulation(object): for pml in pmls: a = 'xyz'.find(pml['axis']) - sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) / \ - numpy.sqrt(pml['epsilon_eff'] * pml['mu_eff']) - alpha_max = 0 # TODO: Nonzero alpha + sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) + kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff']) + alpha_max = 0 # TODO: Nonzero alpha? def par(x): - sigma = ((x / pml['thickness']) ** pml['m']) * sigma_max + scaling = ((x / (pml['thickness'])) ** pml['m']) + sigma = scaling * sigma_max + kappa = 1 + scaling * (kappa_max - 1) alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max - p0 = numpy.exp(-(sigma + alpha) * dt) - p1 = sigma / (sigma + alpha) * (p0 - 1) - return p0, p1 + p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt) + p1 = sigma / (sigma + kappa * alpha) * (p0 - 1) + p2 = 1/kappa + return p0, p1, p2 xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2)) if pml['polarity'] == 'p': @@ -220,7 +223,7 @@ class Simulation(object): elif pml['polarity'] == 'n': xh -= 0.5 - pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '01'] for eh in 'eh'] + pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh'] for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)): pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe) pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph) From d8dc024626f872d2a9310745c96d02413b1e1982 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 02:01:28 -0800 Subject: [PATCH 23/33] Implementation of "source" fields (J) --- opencl_fdtd/kernels/update_j.cl | 32 ++++++++++++++++++++++++++++++++ opencl_fdtd/simulation.py | 24 +++++++++++++++++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 opencl_fdtd/kernels/update_j.cl diff --git a/opencl_fdtd/kernels/update_j.cl b/opencl_fdtd/kernels/update_j.cl new file mode 100644 index 0000000..4d7f64d --- /dev/null +++ b/opencl_fdtd/kernels/update_j.cl @@ -0,0 +1,32 @@ +/* + * Update E-field from J field + * + * Template parameters: + * common_header: Rendered contents of common.cl + * + * OpenCL args: + * E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax + */ + +{{common_header}} + +//////////////////////////////////////////////////////////////////////////// + +__global ftype *Jrx = Jr + XX; +__global ftype *Jry = Jr + YY; +__global ftype *Jrz = Jr + ZZ; +__global ftype *Jix = Ji + XX; +__global ftype *Jiy = Ji + YY; +__global ftype *Jiz = Ji + ZZ; + + +if (x < xmin || y < ymin || z < zmin) { + PYOPENCL_ELWISE_CONTINUE; +} +if (x >= xmax || y >= ymax || z >= zmax) { + PYOPENCL_ELWISE_CONTINUE; +} + +Ex[i] += c * Jrx[i] + s * Jix[i]; +Ey[i] += c * Jry[i] + s * Jiy[i]; +Ez[i] += c * Jrz[i] + s * Jiz[i]; diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 1b6d9a2..6e12b2e 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -70,6 +70,7 @@ class Simulation(object): update_E = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] update_H = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] update_S = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_J = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] sources = None # type: Dict[str, str] def __init__(self, @@ -80,7 +81,8 @@ class Simulation(object): context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, float_type: numpy.float32 or numpy.float64 = numpy.float32, - do_poynting: bool = True): + do_poynting: bool = True, + do_fieldsrc: bool = False): """ Initialize the simulation. @@ -179,6 +181,17 @@ class Simulation(object): S_fields[ptr('oS')] = self.oS S_fields[ptr('S')] = self.S + J_fields = OrderedDict() + if do_fieldsrc: + J_source = jinja_env.get_template('update_j.cl').render(**jinja_args) + self.sources['J'] = J_source + + self.Ji = pyopencl.array.zeros_like(self.E) + self.Jr = pyopencl.array.zeros_like(self.E) + J_fields[ptr('Jr')] = self.Jr + J_fields[ptr('Ji')] = self.Ji + + ''' PML ''' @@ -191,6 +204,15 @@ class Simulation(object): self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) if do_poynting: self.update_S = self._create_operation(S_source, (base_fields, S_fields)) + if do_fieldsrc: + args = OrderedDict() + [args.update(d) for d in (base_fields, J_fields)] + var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')] + print(var_args) + update = ElementwiseKernel(self.context, operation=J_source, + arguments=', '.join(list(args.keys()) + var_args)) + #print(len(args.values()),'\n\n', args.values(), args.keys()) + self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e) def _create_pmls(self, pmls): From 3ac20f62718f52d3ea710fb4fb24d135eb80faf7 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 02:01:48 -0800 Subject: [PATCH 24/33] Add bloch boundaries (untested) --- opencl_fdtd/kernels/update_e.cl | 12 ++++++++++++ opencl_fdtd/kernels/update_h.cl | 15 +++++++++++++++ opencl_fdtd/simulation.py | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 5a9ed1a..c578414 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -31,6 +31,18 @@ ftype dHyz = Hy[i] - Hy[i + mz]; ftype dHzx = Hz[i] - Hz[i + mx]; ftype dHzy = Hz[i] - Hz[i + my]; +{% for bloch in bloch_boundaries -%} + {%- set r = bloch['axis'] -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} +if ({{r}} == 0) { + ftype bloch_im = {{bloch['real']}}; + ftype bloch_re = {{bloch['imag']}}; + dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]); + dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]); +} +{%- endfor %} + + /* * PML Update */ diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 56d9bbc..0ef15dd 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -28,6 +28,21 @@ ftype dEyz = Ey[i + pz] - Ey[i]; ftype dEzx = Ez[i + px] - Ez[i]; ftype dEzy = Ez[i + py] - Ez[i]; + +{% for bloch in bloch_boundaries -%} + {%- set r = bloch['axis'] -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} +if ({{r}} == s{{r}} - 1) { + ftype bloch_re = {{bloch['real']}}; + ftype bloch_im = {{bloch['imag']}}; + dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]); + dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]); +} +{%- endfor %} + + + + {%- if do_poynting %} diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 6e12b2e..9b23cad 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -76,6 +76,7 @@ class Simulation(object): def __init__(self, epsilon: List[numpy.ndarray], pmls: List[Dict[str, int or float]], + bloch_boundaries: List[Dict[str, int or float]] = (), dt: float = .99/numpy.sqrt(3), initial_fields: Dict[str, List[numpy.ndarray]] = None, context: pyopencl.Context = None, @@ -132,6 +133,9 @@ class Simulation(object): self.E = self._create_field(initial_fields.get('E', None)) self.H = self._create_field(initial_fields.get('H', None)) + if bloch_boundaries: + self.F = self._create_field(initial_fields.get('F', None)) + self.G = self._create_field(initial_fields.get('G', None)) for pml in pmls: pml.setdefault('thickness', 8) @@ -154,6 +158,17 @@ class Simulation(object): eps_field = OrderedDict() eps_field[ptr('eps')] = self.eps + if bloch_boundaries: + base_fields[ptr('F')] = self.F + base_fields[ptr('G')] = self.G + + bloch_fields = OrderedDict() + bloch_fields[ptr('E')] = self.F + bloch_fields[ptr('H')] = self.G + bloch_fields[ctype + ' dt'] = self.dt + bloch_fields[ptr('F')] = self.E + bloch_fields[ptr('G')] = self.H + common_source = jinja_env.get_template('common.cl').render( ftype=ctype, shape=self.shape, @@ -162,13 +177,24 @@ class Simulation(object): 'common_header': common_source, 'pmls': pmls, 'do_poynting': do_poynting, + 'bloch': bloch_boundaries, } E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) - self.sources['E'] = E_source self.sources['H'] = H_source + if bloch_boundaries: + bloch_args = jinja_args.copy() + bloch_args['do_poynting'] = False + bloch_args['bloch'] = [{'axis': b['axis'], + 'real': b['imag'], + 'imag': b['real']} + for b in bloch_boundaries] + F_source = jinja_env.get_template('update_e.cl').render(**bloch_args) + G_source = jinja_env.get_template('update_h.cl').render(**bloch_args) + self.sources['F'] = F_source + self.sources['G'] = G_source S_fields = OrderedDict() @@ -196,6 +222,8 @@ class Simulation(object): PML ''' pml_e_fields, pml_h_fields = self._create_pmls(pmls) + if bloch_boundaries: + pml_f_fields, pml_g_fields = self._create_pmls(pmls) ''' Create operations @@ -204,6 +232,9 @@ class Simulation(object): self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) if do_poynting: self.update_S = self._create_operation(S_source, (base_fields, S_fields)) + if bloch_boundaries: + self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields)) + self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields)) if do_fieldsrc: args = OrderedDict() [args.update(d) for d in (base_fields, J_fields)] From 5ffe547640c78af55a7a8e802f8a503cee21e950 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:13:27 -0800 Subject: [PATCH 25/33] Typo fixes and minor comment updates --- opencl_fdtd/kernels/update_e.cl | 4 ++-- opencl_fdtd/kernels/update_h.cl | 2 +- opencl_fdtd/simulation.py | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index c578414..f44527d 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -4,7 +4,7 @@ * Template parameters: * common_header: Rendered contents of common.cl * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing - axes, polarities, and thicknesses. + * axes, polarities, and thicknesses. * * OpenCL args: * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] @@ -20,7 +20,7 @@ __global ftype *epsz = eps + ZZ; /* - * Precalclate derivatives + * Precalculate derivatives */ ftype dHxy = Hx[i] - Hx[i + my]; ftype dHxz = Hx[i] - Hx[i + mz]; diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 0ef15dd..f1c19cb 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -5,7 +5,7 @@ * Template parameters: * common_header: Rendered contents of common.cl * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing - axes, polarities, and thicknesses. + * axes, polarities, and thicknesses. * do_poynting: Whether to precalculate poynting vector components (boolean) * * OpenCL args: diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 9b23cad..0eaef5d 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -56,11 +56,11 @@ class Simulation(object): event0 and event1 to occur (i.e. previous operations to finish) before starting execution. event2 can then be used to prepare further operations to be run after update_H. """ - E = None # type: List[pyopencl.array.Array] - H = None # type: List[pyopencl.array.Array] - S = None # type: List[pyopencl.array.Array] - eps = None # type: List[pyopencl.array.Array] - dt = None # type: float + E = None # type: pyopencl.array.Array + H = None # type: pyopencl.array.Array + S = None # type: pyopencl.array.Array + eps = None # type: pyopencl.array.Array + dt = None # type: float arg_type = None # type: numpy.float32 or numpy.float64 From f5a5a0ad04176e8ebbca15c8e4a53300e5b2f5e6 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:36:11 -0800 Subject: [PATCH 26/33] Enable nonuniform grids (minimally tested) --- opencl_fdtd/kernels/update_e.cl | 28 +++++++++++++++++++++------- opencl_fdtd/kernels/update_h.cl | 28 +++++++++++++++++++++------- opencl_fdtd/simulation.py | 28 +++++++++++++++++++++++++--- 3 files changed, 67 insertions(+), 17 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index f44527d..9127181 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -5,9 +5,12 @@ * common_header: Rendered contents of common.cl * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing * axes, polarities, and thicknesses. + * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. + * Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as + * OpenCL args. * * OpenCL args: - * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] + * E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}] */ {{common_header}} @@ -19,17 +22,28 @@ __global ftype *epsy = eps + YY; __global ftype *epsz = eps + ZZ; +{%- if uniform_dx %} +ftype inv_dx = 1.0 / {{uniform_dx}}; +ftype inv_dy = 1.0 / {{uniform_dx}}; +ftype inv_dz = 1.0 / {{uniform_dx}}; +{%- else %} +ftype inv_dx = inv_dhx[x]; +ftype inv_dy = inv_dhy[y]; +ftype inv_dz = inv_dhz[z]; +{%- endif %} + + /* * Precalculate derivatives */ -ftype dHxy = Hx[i] - Hx[i + my]; -ftype dHxz = Hx[i] - Hx[i + mz]; +ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy; +ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz; -ftype dHyx = Hy[i] - Hy[i + mx]; -ftype dHyz = Hy[i] - Hy[i + mz]; +ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx; +ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz; -ftype dHzx = Hz[i] - Hz[i + mx]; -ftype dHzy = Hz[i] - Hz[i + my]; +ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx; +ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy; {% for bloch in bloch_boundaries -%} {%- set r = bloch['axis'] -%} diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index f1c19cb..ac75157 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -7,9 +7,12 @@ * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing * axes, polarities, and thicknesses. * do_poynting: Whether to precalculate poynting vector components (boolean) + * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. + * Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as + * OpenCL args. * * OpenCL args: - * E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] + * E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] */ {{common_header}} @@ -19,14 +22,25 @@ /* * Precalculate derivatives */ -ftype dExy = Ex[i + py] - Ex[i]; -ftype dExz = Ex[i + pz] - Ex[i]; +{%- if uniform_dx %} +ftype inv_dx = 1.0 / {{uniform_dx}}; +ftype inv_dy = 1.0 / {{uniform_dx}}; +ftype inv_dz = 1.0 / {{uniform_dx}}; +{%- else %} +ftype inv_dx = inv_dex[x]; +ftype inv_dy = inv_dey[y]; +ftype inv_dz = inv_dez[z]; +{%- endif %} -ftype dEyx = Ey[i + px] - Ey[i]; -ftype dEyz = Ey[i + pz] - Ey[i]; -ftype dEzx = Ez[i + px] - Ez[i]; -ftype dEzy = Ez[i + py] - Ez[i]; +ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy; +ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz; + +ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx; +ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz; + +ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx; +ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy; {% for bloch in bloch_boundaries -%} diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 0eaef5d..940725a 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -61,6 +61,7 @@ class Simulation(object): S = None # type: pyopencl.array.Array eps = None # type: pyopencl.array.Array dt = None # type: float + inv_dxes = None # type: List[pyopencl.array.Array] arg_type = None # type: numpy.float32 or numpy.float64 @@ -77,7 +78,8 @@ class Simulation(object): epsilon: List[numpy.ndarray], pmls: List[Dict[str, int or float]], bloch_boundaries: List[Dict[str, int or float]] = (), - dt: float = .99/numpy.sqrt(3), + dxes: List[List[numpy.ndarray]] or float = None, + dt: float = None, initial_fields: Dict[str, List[numpy.ndarray]] = None, context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, @@ -98,7 +100,7 @@ class Simulation(object): 'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6. 'm': Polynomial grading exponent. Default 3.5. 'ma': Exponent for alpha. Default 1. - :param dt: Time step. Default is .99/sqrt(3). + :param dt: Time step. Default is min(dxes) * .99/sqrt(3). :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon. :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon. :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. @@ -124,7 +126,22 @@ class Simulation(object): self._create_context(context, queue) self._create_eps(epsilon) - if dt > .99/numpy.sqrt(3): + if dxes is None: + dxes = 1.0 + + if isinstance(dxes, (float, int)): + uniform_dx = dxes + min_dx = dxes + else: + uniform_dx = False + self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]] + min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1]) + + max_dt = min_dx * .99 / numpy.sqrt(3) + + if dt is None: + self.dt = max_dt + elif dt > max_dt: warnings.warn('Warning: unstable dt: {}'.format(dt)) elif dt <= 0: raise Exception('Invalid dt: {}'.format(dt)) @@ -154,6 +171,10 @@ class Simulation(object): base_fields[ptr('E')] = self.E base_fields[ptr('H')] = self.H base_fields[ctype + ' dt'] = self.dt + if uniform_dx == False: + inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz'] + for name, field in zip(inv_dx_names, self.inv_dxes): + base_fields[ptr(name)] = field eps_field = OrderedDict() eps_field[ptr('eps')] = self.eps @@ -178,6 +199,7 @@ class Simulation(object): 'pmls': pmls, 'do_poynting': do_poynting, 'bloch': bloch_boundaries, + 'uniform_dx': uniform_dx, } E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) From 65cc1183921b6107b8487fc762a1606bb6512d8a Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:36:22 -0800 Subject: [PATCH 27/33] Remove debug code --- opencl_fdtd/simulation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 940725a..087e9c4 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -261,10 +261,8 @@ class Simulation(object): args = OrderedDict() [args.update(d) for d in (base_fields, J_fields)] var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')] - print(var_args) update = ElementwiseKernel(self.context, operation=J_source, arguments=', '.join(list(args.keys()) + var_args)) - #print(len(args.values()),'\n\n', args.values(), args.keys()) self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e) From 9cf50b1d88f2745cbf8f8215e23ec0610de67b84 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:36:32 -0800 Subject: [PATCH 28/33] Enable nonzero pml alpha --- opencl_fdtd/simulation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 087e9c4..d24dcf2 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -160,6 +160,7 @@ class Simulation(object): pml.setdefault('mu_eff', 1.0) pml.setdefault('ln_R_per_layer', -1.6) pml.setdefault('m', 3.5) + pml.setdefault('cfs_alpha', 0) pml.setdefault('ma', 1) ctype = type_to_C(self.arg_type) @@ -278,7 +279,7 @@ class Simulation(object): sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff']) - alpha_max = 0 # TODO: Nonzero alpha? + alpha_max = pml['cfs_alpha'] def par(x): scaling = ((x / (pml['thickness'])) ** pml['m']) From 60b70bb332d29fb5a2aecb801517be659cf4daf9 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:37:07 -0800 Subject: [PATCH 29/33] Add restrict keyword to pointers (not sharing the same memory for multiple fields) --- opencl_fdtd/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index d24dcf2..1e573c9 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -166,7 +166,7 @@ class Simulation(object): ctype = type_to_C(self.arg_type) def ptr(arg: str) -> str: - return ctype + ' *' + arg + return ctype + ' * restrict ' + arg base_fields = OrderedDict() base_fields[ptr('E')] = self.E From a76e741d325d26ff5968904ae12f4ed95effd313 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:37:16 -0800 Subject: [PATCH 30/33] Minor spacing changes --- opencl_fdtd/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 1e573c9..988e45b 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -321,7 +321,6 @@ class Simulation(object): arguments=', '.join(args.keys())) return lambda e: update(*args.values(), wait_for=e) - def _create_context(self, context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None): if context is None: @@ -353,6 +352,7 @@ class Simulation(object): Exception('Initial field list elements must have same shape as epsilon elements') return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type)) + def type_to_C(float_type) -> str: """ Returns a string corresponding to the C equivalent of a numpy type. From 0f0f16e1846489ac1ab4c4126167c5b0b3fbb0fe Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 30 Nov 2018 01:03:10 -0800 Subject: [PATCH 31/33] Minor formatting changes --- opencl_fdtd/kernels/update_h.cl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index ac75157..187a18a 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -52,10 +52,7 @@ if ({{r}} == s{{r}} - 1) { dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]); dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]); } -{%- endfor %} - - - +{% endfor -%} {%- if do_poynting %} @@ -93,7 +90,7 @@ ftype pHzi = 0; {%- set se, sh = '-', '+' -%} {%- else -%} {%- set se, sh = '+', '-' -%} - {%- endif -%} + {%- endif %} int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; From 385e2859a9620cefe3536bd2eea9f5a051c2e9b2 Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 30 Nov 2018 01:03:20 -0800 Subject: [PATCH 32/33] Update parameter descriptions --- opencl_fdtd/simulation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 988e45b..9dd90c0 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -100,9 +100,13 @@ class Simulation(object): 'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6. 'm': Polynomial grading exponent. Default 3.5. 'ma': Exponent for alpha. Default 1. + :param bloch_boundaries: List of dicts with keys: + 'axis': One of 'x', 'y', 'z'. + 'real': Real part of bloch phase factor (i.e. real(exp(i * phase))) + 'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase))) :param dt: Time step. Default is min(dxes) * .99/sqrt(3). - :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon. - :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon. + :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the + specified fields (default is 0 everywhere). Fields have same format as epsilon. :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. From 582dafbc2fefce0bcc38b524a715e99f9c5ac89b Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 30 Nov 2018 01:04:00 -0800 Subject: [PATCH 33/33] Update example with bloch fields --- fdtd.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/fdtd.py b/fdtd.py index 08606c6..99af07b 100644 --- a/fdtd.py +++ b/fdtd.py @@ -128,7 +128,9 @@ def main(): # #### Create the simulation grid #### pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness} for a in 'xyz' for p in 'np'] - sim = Simulation(grid.grids, do_poynting=True, pmls=pmls) + #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x'] + bloch = [] + sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch) # Source parameters and function w = 2 * numpy.pi * dx / wl @@ -142,22 +144,32 @@ def main(): with open('sources.c', 'w') as f: f.write(sim.sources['E']) - f.write('\n==========================================\n') + f.write('\n====================H======================\n') f.write(sim.sources['H']) if sim.update_S: - f.write('\n==========================================\n') + f.write('\n=====================S=====================\n') f.write(sim.sources['S']) + if bloch: + f.write('\n=====================F=====================\n') + f.write(sim.sources['F']) + f.write('\n=====================G=====================\n') + f.write(sim.sources['G']) # #### Run a bunch of iterations #### # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued # immediately and run once prev_event is finished. start = time.perf_counter() for t in range(max_t): - sim.update_E([]).wait() + e = sim.update_E([]) + if bloch: + e = sim.update_F([e]) + e.wait() ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape) sim.E[ind] += field_source(t) e = sim.update_H([]) + if bloch: + e = sim.update_G([e]) if sim.update_S: e = sim.update_S([e]) e.wait()